# Implementation of NN based Ranking Model on Slack Data

```/* @Author: Jagan Kaartik */```

In [1]:
import os
import re
import nltk

In [None]:
os.getcwd()

In [None]:
path = "../Data/Slack_Data/Dataset"

In [None]:
filelist = os.listdir(path)
filelist

In [None]:
filelist.pop(0)

In [None]:
newpath = "/Volumes/JK/AI-Search-Engine/src/Data/Slack_Data/Dataset"
os.chdir(newpath)

In [None]:
os.getcwd()

In [None]:
corpus = []
for i in filelist:
     with open("{}".format(i)) as f_input:
            corpus.append(f_input.read())

In [142]:
len(corpus)

6378

In [None]:
corpus[0]

## Pre-Processing Corpus Data 

In [None]:
newpath = "/Volumes/JK/AI-Search-Engine/src/Data/Slack_Data"
os.chdir(newpath)

In [None]:
# Remove \n in Corpus Txt

for c in range(0,len(corpus)):
    corpus[c] = re.sub('\n','',corpus[c])

In [None]:
corpus[0]

## Lemmatization and Removal of Stop Words from Corpus

In [None]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords 
stop_words = set(stopwords.words('english')) 
lemmatizer = WordNetLemmatizer() 

for c in range(0,len(corpus)):
    word_list = nltk.word_tokenize(corpus[c])
    filtered_sentence = [w for w in word_list if not w in stop_words]
    corpus[c] = ' '.join([lemmatizer.lemmatize(w) for w in filtered_sentence])

In [None]:
corpus[0]

### Writing Lemmatized Corpus to File for Efficient Retreival 

In [309]:
path = "/Volumes/JK/AI-Search-Engine/src/Data/Slack_Data/Lemmatized Corpus/"
os.chdir(path)

In [310]:
for c in range(0,len(corpus)):
    with open('doc{}.txt'.format(c+1), 'w') as f:
        f.write("%s\n" % corpus[c])

### Inverted - Index

In [312]:
from collections import defaultdict

In [313]:
class InvertedIndex: 
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.index = defaultdict(list)
        self.documents = {}
        self.unique_id = 0
 
    def termLookup(self, word):
        word = word.lower()
        return [self.documents.get(id, None) for id in self.index.get(word)]
    
    def retIndex(self):           
        return self.index.items()
 
    def addTerm(self, document):
        for token in [t.lower() for t in nltk.word_tokenize(document)]:
            if self.unique_id not in self.index[token]:
                self.index[token].append(self.unique_id)
 
        self.documents[self.unique_id] = document
        self.unique_id += 1           

invObj = InvertedIndex(nltk.word_tokenize)

### Adding Corpus Data to Inverted Index

In [314]:
for c in range(0,100):
    invObj.addTerm(corpus[c])

In [315]:
res = invObj.termLookup("cancer")
print(len(res))

9


### View Inverted Index

In [316]:
invIndex = invObj.retIndex()

for k,v in invIndex:
    print("{} ---> {}".format(k,v)) 

link ---> [0, 1, 13, 16, 25, 29, 34, 37, 44, 48, 52, 53, 68, 71, 76, 79, 82, 83, 87, 90, 91, 94, 95]
national ---> [0, 1, 2, 3, 5, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 43, 45, 46, 47, 51, 52, 53, 54, 55, 56, 58, 59, 63, 66, 69, 70, 71, 72, 74, 76, 77, 79, 81, 82, 83, 87, 88, 89, 90, 91, 93, 94, 95]
cancer ---> [0, 7, 14, 15, 23, 29, 47, 79, 80]
institute ---> [0, 2, 5, 7, 9, 14, 15, 16, 19, 23, 25, 29, 30, 37, 38, 47, 48, 49, 56, 67, 70, 72, 74, 76, 77, 79, 82, 83, 88, 91, 98]
center ---> [0, 2, 3, 7, 8, 10, 11, 14, 15, 16, 18, 22, 23, 25, 27, 29, 30, 33, 34, 36, 37, 38, 39, 40, 42, 44, 45, 46, 47, 48, 52, 53, 54, 55, 56, 58, 60, 61, 62, 63, 64, 67, 69, 70, 71, 72, 73, 74, 75, 76, 79, 81, 82, 89, 90, 91, 99]
research ---> [0, 2, 6, 7, 9, 10, 11, 14, 15, 16, 18, 23, 24, 25, 27, 30, 34, 37, 38, 39, 40, 42, 44, 47, 48, 52, 55, 56, 58, 59, 62, 68, 69, 70, 71, 72, 74, 75, 76, 79, 80, 82, 83, 87, 88, 89, 90, 91, 98]
contact 

tank.an ---> [2]
advantage ---> [2, 14, 16, 20, 23, 24, 37, 44, 52, 53, 87]
last ---> [2, 3, 4, 10, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 25, 27, 30, 32, 33, 34, 36, 39, 42, 44, 47, 49, 51, 52, 53, 54, 55, 56, 63, 66, 78, 79, 82, 83, 85, 87, 90, 92, 94, 98, 99]
longer ---> [2, 7, 9, 16, 24, 29, 33, 37, 39, 42, 44, 47, 53, 55, 56, 58, 63, 64, 69, 75, 79, 82, 87]
engines.^ ---> [2]
after ---> [2, 25, 37, 53, 54, 56, 83, 87]
conducting ---> [2, 3, 7, 23, 37, 40, 52, 55, 69, 70, 71, 73, 87]
dekalb ---> [2]
georgia ---> [2, 9, 16, 43, 44, 76]
noted ---> [2, 7, 8, 25, 30, 32, 34, 37, 42, 48, 54, 56, 58, 62, 75, 79, 82, 87]
acquisition ---> [2, 16, 39, 88]
steep ---> [2, 16]
long-term ---> [2, 7, 15, 16, 17, 23, 39, 45, 53, 55, 58, 60, 64, 69, 72, 73, 74, 75, 79, 87]
saving ---> [2, 16, 40, 47, 53, 90]
outweighed ---> [2, 37, 79]
short-term ---> [2, 16, 23, 39, 48, 53, 58, 60, 70, 75, 79, 82, 91]
costs.^ ---> [2]
6.estimated ---> [2]
din ---> [2]
266,000 ---> [2]
among ---> [2, 5, 7, 8, 10,

use^a ---> [2]
autos ---> [2]
28.7 ---> [2]
33.1 ---> [2]
67.2 ---> [2]
21.9 ---> [2]
31.9 ---> [2]
4.5 ---> [2]
1.2 ---> [2, 6, 16, 69]
62.7 ---> [2]
32.5 ---> [2]
31.3 ---> [2]
29.9 ---> [2]
4.2 ---> [2, 69]
4.7 ---> [2, 48]
0.4 ---> [2]
19.2 ---> [2]
26.4 ---> [2]
29.5 ---> [2]
9.1 ---> [2]
0.1 ---> [2]
24.4 ---> [2]
19.3 ---> [2]
1.4 ---> [2, 7, 69]
4.6 ---> [2, 63]
2.7 ---> [2]
0.2 ---> [2]
19.8 ---> [2]
16.6 ---> [2, 7]
7.1 ---> [2]
0.6 ---> [2]
7.2 ---> [2]
8.0 ---> [2]
2.9 ---> [2]
1.3 ---> [2, 39, 53, 64, 69]
6.7 ---> [2]
0.5 ---> [2]
0.8 ---> [2, 69]
100.0 ---> [2]
93,310 ---> [2]
18,941 ---> [2]
^areported ---> [2]
survey.these ---> [2]
represent ---> [2, 5, 8, 15, 16, 23, 29, 37, 39, 40, 55, 58, 64, 75, 80, 81, 82, 83, 87, 89, 93, 96]
entire ---> [2, 5, 9, 19, 23, 39, 42, 53, 54, 55, 56, 58, 59, 63, 69, 83, 87, 89, 98, 99]
population ---> [2, 15, 16, 18, 19, 23, 25, 29, 30, 33, 39, 42, 47, 48, 54, 55, 56, 58, 64, 69, 71, 75, 79, 82, 87, 98, 99]
shown ---> [2, 11, 16, 19, 23

probation.new ---> [3]
birth ---> [3, 13, 32, 37, 47, 79, 87, 98]
missionary ---> [3, 33, 43]
charles ---> [3, 10, 16, 19, 37, 76, 80, 87]
fire.boxes ---> [3]
sitting ---> [3, 8, 29, 56, 59, 79]
wall ---> [3, 8, 10, 28, 29, 48, 51, 60, 79, 87]
spread ---> [3, 8, 9, 12, 16, 25, 34, 39, 52, 54, 64, 69, 83]
minimal ---> [3, 16, 48, 53, 66, 99]
church.two ---> [3]
charges.each ---> [3]
probation.57.our ---> [3]
lady ---> [3, 17, 30, 85]
queen ---> [3, 16]
heaven ---> [3, 8]
damaged.the ---> [3]
our ---> [3, 16, 19, 39, 44, 47, 49, 56, 58, 99]
proximity ---> [3, 16, 40, 87]
school.the ---> [3]
60,000.two ---> [3]
18.58.cypress ---> [3]
zachary ---> [3]
59.saint ---> [3]
paul ---> [3, 16, 25, 26, 37, 56, 58, 76, 87]
60.sweet ---> [3]
61.saint ---> [3]
benevolent ---> [3]
society ---> [3, 8, 9, 14, 15, 23, 33, 37, 47, 48, 54, 55, 56, 72, 76, 79, 82, 83, 86, 88, 91]
arsonist ---> [3, 33, 93]
brought ---> [3, 8, 25, 29, 33, 37, 44, 53, 54, 55, 56, 57, 62, 66, 69, 87, 89]
churches.in ---> [3]
st

retire ---> [4, 53, 89]
hold ---> [4, 7, 8, 15, 16, 17, 19, 23, 26, 32, 33, 37, 40, 52, 53, 54, 55, 60, 62, 63, 65, 78, 87, 92]
accountable ---> [4, 39, 89, 99]
profiteering ---> [4, 40]
ease ---> [4, 30, 39, 63, 69, 79]
burden ---> [4, 7, 8, 16, 37, 39, 48, 53, 62, 89]
bear ---> [4, 7, 37, 40, 56, 58, 76, 79, 87]
lion ---> [4, 76, 87]
harm ---> [4, 8, 24, 33, 37, 39, 47, 48, 52, 58, 69, 87]
inflicted ---> [4, 37, 55]
gouger ---> [4]
pillaged ---> [4]
yet.we ---> [4]
dozen ---> [4, 17, 30, 42, 44, 53, 54, 74, 87]
active ---> [4, 7, 8, 14, 16, 19, 23, 30, 33, 37, 39, 40, 48, 53, 55, 56, 58, 60, 63, 69, 75, 79, 89, 97]
lawsuit ---> [4, 33, 37, 55]
back ---> [4, 8, 9, 15, 18, 19, 25, 30, 32, 33, 37, 39, 40, 42, 43, 44, 49, 50, 52, 53, 54, 56, 59, 63, 65, 73, 74, 75, 76, 79, 80, 87, 96, 99]
picked ---> [4, 13, 16, 25, 87]
pocket ---> [4, 23, 28, 39]
forthcoming ---> [4, 87]
information.he ---> [4]
cooperating ---> [4, 14, 30, 43]
manipulation.under ---> [4]
allowed ---> [4, 8, 16, 18, 19, 

mangal ---> [5]
hindi ---> [5, 33]
audience ---> [5, 10, 41, 77]
looking ---> [5, 8, 25, 42, 44, 52, 59, 63, 79, 83, 87, 96, 97, 99]
crossover ---> [5]
world.so ---> [5]
panchatantra ---> [5]
tale ---> [5, 10]
culture-specific ---> [5]
says.global ---> [5]
vision ---> [5, 7, 9, 14, 16, 29, 44, 47, 56, 87, 88, 89]
say ---> [5, 8, 16, 17, 25, 30, 33, 37, 42, 44, 52, 53, 54, 55, 59, 64, 65, 78, 85, 87]
possibility ---> [5, 7, 9, 11, 16, 25, 30, 34, 37, 40, 44, 47, 51, 61, 83, 99]
developmental ---> [5, 7, 23, 68]
u.k. ---> [5, 25]
writer ---> [5, 6, 56, 87]
creative ---> [5, 10, 99]
co-owned ---> [5]
us.so ---> [5]
partner ---> [5, 7, 15, 16, 17, 18, 23, 39, 58, 64, 69, 73, 77, 83, 99]
creativity ---> [5, 7, 50, 56]
... ---> [5, 20, 23, 25, 37, 42, 45, 49, 52, 53, 63, 69, 85, 88, 90]
kind ---> [5, 8, 10, 16, 23, 24, 25, 30, 33, 35, 37, 40, 42, 44, 49, 53, 54, 55, 65, 66, 73, 79, 80, 87]
afghanistan ---> [5, 8, 17, 22, 85]
taliban ---> [5, 8, 17, 85]
years ---> [5, 25, 47, 99]
co-produced 

helen ---> [7, 25, 40, 56]
wegman ---> [7]
nhlbi/nih ---> [7]
andrew ---> [7, 14, 20, 24, 37, 89]
jones ---> [7, 56, 65, 78, 94]
ficnih ---> [7]
sandy ---> [7, 69]
warren ---> [7, 76]
randolph ---> [7]
krause ---> [7]
fic/nihopen ---> [7]
meeting* ---> [7]
call ---> [7, 8, 10, 15, 16, 18, 19, 24, 30, 32, 33, 37, 38, 39, 40, 42, 44, 46, 47, 52, 53, 54, 55, 58, 63, 69, 73, 74, 76, 87, 90, 94, 99]
orderdr ---> [7]
2001.he ---> [7]
welcomed ---> [7, 37]
guests.dr ---> [7]
meeting.he ---> [7]
shope ---> [7]
participated ---> [7, 9, 15, 24, 33, 52, 55, 63, 70]
awards ---> [7, 15, 39, 72, 91]
subcommittee ---> [7, 15, 37, 53, 58, 73, 89]
unable ---> [7, 16, 19, 25, 33, 37, 46, 52, 53, 55, 62, 87]
recovering ---> [7, 16, 23]
laryngeal ---> [7]
surgery ---> [7, 23, 29, 47, 63, 79, 82]
meeting.dr ---> [7]
congratulated ---> [7, 63]
drs ---> [7]
wilfert ---> [7]
graduation ---> [7]
respective ---> [7, 37, 43, 55]
daughter ---> [7, 8, 23, 55, 63, 79]
school.he ---> [7]
guest ---> [7, 10, 44, 94]
p

trying ---> [8, 19, 33, 42, 44, 45, 52, 55, 56, 58, 59, 64, 68, 75, 87]
build ---> [8, 16, 19, 29, 33, 53, 74, 83, 97]
understanding.leaders ---> [8]
muslims.if ---> [8]
willing ---> [8, 9, 19, 53, 56, 59, 79, 87, 99]
follow ---> [8, 23, 24, 25, 32, 37, 44, 45, 48, 53, 54, 58, 61, 63, 69, 79, 81, 85, 91, 94]
fundament ---> [8]
interreligious ---> [8]
sanaulla ---> [8]
kirmani ---> [8]
adjunct ---> [8]
professor ---> [8, 15, 23, 25, 47, 48, 50, 55, 56, 62, 87]
philosophy ---> [8, 56]
goucher ---> [8]
collegechristianity ---> [8]
love ---> [8, 44, 63, 87]
necessarily ---> [8, 19, 24, 29, 37, 44, 54, 59, 80, 83, 87, 89, 96, 99]
everybody ---> [8, 30, 54, 85, 87]
else ---> [8, 29, 30, 37, 52, 59, 87]
fundamental ---> [8, 9, 15, 17, 23, 30, 37, 40, 41, 53, 55, 56, 62, 69]
christianity.the ---> [8]
affair ---> [8, 37, 55, 89]
justice.god ---> [8]
implies ---> [8, 32, 37]
unity ---> [8, 50, 55]
vehemently ---> [8]
monotheistic ---> [8]
everything ---> [8, 9, 10, 16, 44, 54, 63, 85, 89]
god.th

mcgill ---> [13]
terrace ---> [13, 76]
n.w ---> [13, 38, 62, 76]
20009 ---> [13]
tel ---> [13, 25, 39]
483-1407 ---> [13]
orleans ---> [13, 16, 32, 37, 50, 76]
tampa.in ---> [13]
abduction ---> [13, 33]
entry/exit ---> [13]
points.these ---> [13]
requiring ---> [13, 16, 24, 29, 34, 37, 39, 40, 53, 55, 62, 78, 87]
permission ---> [13, 33, 37, 55]
guardian ---> [13]
present.having ---> [13]
facilitate ---> [13, 16, 23, 28, 39, 51, 53, 55, 69, 73, 83, 99]
entry/departure.u.s ---> [13]
transiting ---> [13]
fees.u.s ---> [13]
piloting ---> [13]
procedures.safety ---> [13]
warned ---> [13, 47, 58, 87, 89]
darien ---> [13]
province ---> [13, 22, 39, 69]
drawn ---> [13, 30, 55, 60, 62, 82, 87]
punta ---> [13]
carreto ---> [13]
comarca ---> [13]
blas ---> [13]
atlantic ---> [13, 76]
coast ---> [13, 52, 56, 60, 61, 63, 69, 87, 89]
yaviza ---> [13]
pina ---> [13]
.this ---> [13, 16, 25, 39, 42, 47, 59, 69, 82]
encompasses ---> [13, 31, 37, 69, 82]
privately-owned ---> [13]
resorts.while ---> [13]

children ---> [15, 17, 23, 34, 47, 55, 68, 76, 91, 94]
families.from ---> [15]
1986-1997 ---> [15]
hhs.dr ---> [15]
carmona ---> [15]
nominated ---> [15, 89, 96]
david ---> [15, 16, 20, 27, 40, 42, 47, 56, 65, 76, 78]
satcher ---> [15]
ended ---> [15, 30, 32, 33, 39, 43, 47, 53, 63, 79, 87, 99]
february.dr ---> [15]
sullivan ---> [15, 47]
coburn ---> [15]
co-chair ---> [15, 30, 43, 93]
presidential ---> [15, 37, 39, 40, 68, 89, 96]
hiv/aids.dr ---> [15]
jerome ---> [15]
hauer ---> [15]
preparedness.he ---> [15]
d.a ---> [15, 28]
heath ---> [15]
preparedness ---> [15, 19, 58, 73]
secretarys ---> [15]
preparedness.dr ---> [15]
jeffrey ---> [15]
koplan ---> [15]
resigned ---> [15]
emory ---> [15]
universitys ---> [15]
woodruff ---> [15]
center.dr ---> [15]
fleming ---> [15]
ncid ---> [15]
julie ---> [15]
gerberding ---> [15]
anddr ---> [15]
osterholm ---> [15]
minnesota.dr ---> [15]
roderic ---> [15]
i. ---> [15, 23, 27, 37, 56, 63, 88]
pettigrew ---> [15]
permanent ---> [15, 30, 33, 37, 

isolates ---> [16, 23]
environments.pilot ---> [16]
fermentation ---> [16]
7000-l ---> [16]
parameter ---> [16, 23, 70, 99]
development.phase ---> [16]
7,000l ---> [16]
toxicology ---> [16, 48]
elucidation ---> [16, 23]
ingredients.insecticidal ---> [16]
organic ---> [16, 25, 69]
sector._________________________________________________________________title ---> [16]
transgenesiscompany ---> [16]
kuehnle ---> [16]
agrosystems ---> [16]
3119 ---> [16]
woods ---> [16]
honolulu ---> [16]
96822principal ---> [16]
manfred ---> [16]
kuehnletopic ---> [16]
monthsgenetically ---> [16]
materials.the ---> [16, 37, 56]
shortcoming ---> [16, 33]
improvement.we ---> [16]
reproducibly ---> [16]
magnetic ---> [16, 44, 63]
particle ---> [16, 44, 60]
micromagnets ---> [16]
tissues.research ---> [16]
micromagnet ---> [16]
geometry/composition ---> [16]
payload ---> [16, 44, 63]
sequestering.identification ---> [16]
mersistems ---> [16]
plastid ---> [16]
gametes.anticipated ---> [16]
researchmagnetophores

mastitis ---> [16]
hazard ---> [16, 37, 39, 53, 58, 69, 70, 73, 99]
selects ---> [16]
antibiotic ---> [16, 83]
bacteria.we ---> [16]
anti-microbial ---> [16]
nisin ---> [16]
disease.nisin ---> [16]
bacterocin ---> [16]
safely ---> [16, 25, 36, 44, 52, 87]
gras ---> [16, 32, 50]
applications.preliminary ---> [16]
lactating ---> [16, 75, 87]
cows.this ---> [16]
therapy.our ---> [16]
characteristics.first ---> [16]
mastitis.second ---> [16]
concerns.third ---> [16]
resistance.anticipated ---> [16]
doses.the ---> [16]
dose.considering ---> [16]
unreasonable.thus ---> [16]
projected ---> [16, 39, 53]
alone.the ---> [16, 37]
large._________________________________________________________________title ---> [16]
viruscompany ---> [16]
wolf ---> [16, 18]
biotech ---> [16]
4711 ---> [16]
huntington ---> [16]
5w ---> [16]
lincoln ---> [16, 32, 69, 76]
68504principal ---> [16]
osvaldo ---> [16, 30]
lopeztopic ---> [16]
69,000/6 ---> [16]
monthsporcine ---> [16]
syndrome ---> [16, 23, 29, 80]
late-

alaska ---> [16, 18, 52, 56, 69, 76]
obnoxious ---> [16]
manner.the ---> [16]
grows ---> [16, 42, 69, 79]
kodiak ---> [16]
ketchikan ---> [16]
state.it ---> [16]
dominate ---> [16, 39, 55]
verified.each ---> [16]
dried ---> [16, 25, 28, 64, 82]
pound.some ---> [16]
pound.anticipated ---> [16]
materially ---> [16, 37, 62]
natives ---> [16]
dwell ---> [16]
corporations ---> [16]
ancsa ---> [16]
1976._________________________________________________________________title ---> [16]
clustercompany ---> [16]
diversified ---> [16, 23]
associates ---> [16, 56, 67, 92]
984 ---> [16]
romney ---> [16]
wv ---> [16, 76]
26757principal ---> [16]
harvey ---> [16, 69]
christietopic ---> [16]
liability ---> [16, 26, 39, 59, 70, 87, 90]
hhllc ---> [16]
value- ---> [16]
contiguous ---> [16, 69]
counties.the ---> [16]
mountainous ---> [16, 69]
predominantly ---> [16, 30, 33, 37, 48, 54, 55, 64, 79, 89]
depressed.participants ---> [16]
community-based ---> [16, 72]
locally ---> [16, 25, 39, 55, 69, 72, 79]


law.farida ---> [17]
activist ---> [17, 30, 33, 55, 56, 85]
spoke.she ---> [17]
silenced ---> [17]
restored ---> [17, 23, 52, 53]
said.also ---> [17]
dignitary ---> [17]
cabinet ---> [17, 30, 33, 37, 55, 89, 99]
hillary ---> [17]
rodham ---> [17]
york.bush ---> [17]
liberated ---> [17]
terror ---> [17, 30, 64]
abroad.this ---> [17]
afghans ---> [17]
lesson ---> [17, 23, 90]
past.we ---> [17]
houseoffice ---> [17]
secretarydecember ---> [17]
2001remarks ---> [17]
2001the ---> [17]
d.c.11:35 ---> [17]
estthe ---> [17]
thank ---> [17, 24, 25, 40, 41, 44, 52, 54, 63, 73]
all.for ---> [17]
brutal ---> [17, 42]
women.thanks ---> [17]
brave ---> [17, 30, 56]
end.yet ---> [17]
ended.we ---> [17]
bonn ---> [17, 85]
afghanistan.we ---> [17]
child.and ---> [17]
goal.i ---> [17]
steadiness ---> [17]
crisis.i ---> [17]
farida ---> [17]
courage.i ---> [17]
today.i ---> [17]
sima ---> [17]
wali ---> [17]
ceo ---> [17, 99]
advocate ---> [17, 33, 39, 40, 53, 54]
week.i ---> [17]
veneman ---> [17]
whitm

populations.5.sponsored ---> [23]
tissues.6.sponsored ---> [23]
manipulating ---> [23]
descendants.7.promotion ---> [23]
humans.8.sponsor ---> [23]
populations.this ---> [23]
a.niddk-sponsored ---> [23]
crafting ---> [23]
ro1 ---> [23]
sba ---> [23, 90]
'shadow ---> [23]
genomics/proteomics ---> [23]
arises ---> [23, 48, 62]
fullest ---> [23, 37, 43, 93]
government-sponsored ---> [23, 47]
endoderm ---> [23]
biologyoverview ---> [23]
endodermal ---> [23]
lung ---> [23, 36, 38, 47, 48, 79, 80]
founder ---> [23, 47]
cells.we ---> [23]
age.until ---> [23]
ectodermal ---> [23]
mesodermal ---> [23]
endoderm.however ---> [23]
frog ---> [23, 42]
mouse ---> [23, 25, 61, 80]
breakthrough ---> [23, 47]
systems.genes ---> [23]
manipulable ---> [23]
non-mammalian ---> [23]
mammals.the ---> [23]
evolutionary ---> [23, 60]
transplantation ---> [23, 29, 52]
ripe ---> [23, 37, 87]
aging.the ---> [23]
embryonic ---> [23, 91]
patterning ---> [23]
mammal ---> [23, 69, 87, 98]
differentiation ---> [23]
es 

856-6955 ---> [23]
peter.quesenberry ---> [23]
umassmed.edugutjeffrey ---> [23]
gordon ---> [23, 52, 56, 89]
pharmacology ---> [23, 48, 75]
660 ---> [23]
euclid ---> [23, 76]
63110 ---> [23]
314 ---> [23, 37, 53, 76]
362-7243 ---> [23]
362-7047 ---> [23]
jgordon ---> [23]
pharmsun.wustl.edudaniel ---> [23]
podolsky ---> [23]
02114 ---> [23]
726-7411 ---> [23]
724-2136 ---> [23]
podolsky.daniel ---> [23]
mgh.harvard.edurobert ---> [23]
coffey ---> [23]
jr. ---> [23, 26, 32, 37, 40, 56, 76, 92]
univ ---> [23, 52, 69]
sch ---> [23]
2201 ---> [23, 76]
37235 ---> [23]
343-1500 ---> [23]
343-1591 ---> [23]
robert.coffey ---> [23]
mcmail.vanderbilt.edurobert ---> [23]
343-4747 ---> [23]
robert.whitehead ---> [23]
mcmail.vanderbilt.eduliverrebecca ---> [23]
taub ---> [23]
705a ---> [23]
stellar-chance ---> [23]
laboratories/6100 ---> [23]
19104 ---> [23]
898-9131 ---> [23]
573-5892 ---> [23]
taubra ---> [23]
mail.med.upenn.edumarkus ---> [23]
grompe ---> [23]
bonegerard ---> [23]
karsenty --->

//www.vegsource.com/talk/lyman/messages/7548.html ---> [25]
chew ---> [25]
bmj ---> [25]
//www.bmj.com/cgi/eletters/319/7220/1312/b ---> [25]
el2 ---> [25]
//www.bmj.com/cgi/eletters/320/7226/8/b ---> [25]
el1 ---> [25]
sr ---> [25]
kraemer ---> [25]
gibbons ---> [25]
holman ---> [25]
belay ---> [25]
schonberger ---> [25]
//jama.ama-assn.org/issues/v285n6/ffull/jlt0214-2.html ---> [25]
truth.you ---> [25]
//www.testcowsnow.com ---> [25]
//www.vegsource.com/talk/madcow/index.html ---> [25]
//www.fortunecity.com/healthclub/cpr/349/part1cjd.htm ---> [25]
//disc.server.com/indices/167318.html ---> [25]
fsis ---> [25]
3/10/03 ---> [25]
//www.vegsource.com/talk/madcow/messages/9912605.html ---> [25]
3/08/03 ---> [25]
//www.vegsource.com/talk/madcow/messages/9912601.html ---> [25]
//www.vegsource.com/talk/madcow/messages/9912602.html ---> [25]
3/09/03 ---> [25]
//www.vegsource.com/talk/madcow/messages/9912604.html ---> [25]
cattlemen ---> [25]
wait ---> [25, 44, 46, 48, 59, 63]
1/17/03 ---> [

cataracts ---> [29]
detachment ---> [29]
blindness.back ---> [29]
impending ---> [29]
diabetes._________________________________________________________________back ---> [29]
california.conditions ---> [29]
schwarzenegger ---> [29]
go14 ---> [30, 83]
2002state ---> [30]
noon ---> [30]
briefing ---> [30, 63, 67, 73, 85]
cuba ---> [30, 60, 89]
azerbaijan/armenia ---> [30]
israel/palestinian ---> [30]
iceland ---> [30]
5720 ---> [30]
spokesman ---> [30, 53]
phil ---> [30]
reeker ---> [30]
briefed.following ---> [30]
1:15 ---> [30]
2002briefer ---> [30]
philip ---> [30, 56, 76, 91]
spokesmanpakistan ---> [30]
karachi ---> [30]
needs/activities ---> [30]
consulates ---> [30, 40]
musharrafiraq ---> [30]
expulsion ---> [30]
diplomat ---> [30]
missioncuba ---> [30]
castro ---> [30, 89]
speech/project ---> [30]
varela ---> [30]
cubanorth ---> [30]
korean ---> [30, 39, 49, 77, 89]
officials ---> [30, 37, 40]
asylum ---> [30, 33]
seekers ---> [30]
facilitiesazerbaijan/armenia ---> [30]
minsk --->

classes.many ---> [33]
families.unicef ---> [33]
enrolled ---> [33, 79, 82]
school.in ---> [33]
girls.illiteracy ---> [33]
6.d ---> [33]
exploited.the ---> [33]
adoptions.there ---> [33]
infants.poor ---> [33]
jeopardized ---> [33, 53]
earned ---> [33, 53, 55, 89, 90]
conformity ---> [33]
regulations.the ---> [33, 55, 68]
litigate ---> [33]
children.during ---> [33]
rings.persons ---> [33]
disabilitiesmore ---> [33]
war.families ---> [33]
disabilities.most ---> [33]
organizations.these ---> [33]
funded.there ---> [33]
filled ---> [33, 42, 87]
position.during ---> [33]
law.during ---> [33]
personnel.national/racial/ethnic ---> [33]
minoritiesaccording ---> [33]
380,000 ---> [33]
overpopulated ---> [33]
fighting.the ---> [33]
country.refugees ---> [33]
feared ---> [33, 55, 56]
completely.very ---> [33]
unskilled ---> [33, 55]
occupations.in ---> [33]
decline.the ---> [33]
professions.in ---> [33]
country.under ---> [33, 39]
1948.other ---> [33]
foreigner ---> [33, 39, 53, 55, 73]
offices

oversight.c ---> [37]
departmentcongressional ---> [37]
actions.however ---> [37]
palmer ---> [37]
flats ---> [37, 56]
obliged ---> [37]
malfeasance ---> [37]
elsewhere.a ---> [37]
spawned ---> [37]
policy.in ---> [37]
subordinate ---> [37, 62]
informally ---> [37, 53]
cases.in ---> [37, 54, 55]
memoranda ---> [37, 68]
correspondence ---> [37, 55, 82]
outlining ---> [37, 82]
prejudicial ---> [37]
publicity ---> [37]
informant ---> [37, 62]
chilling ---> [37]
recounted ---> [37]
ut ---> [37, 42, 76]
surely ---> [37, 53, 87]
grind ---> [37]
halt ---> [37]
wrongdoing ---> [37]
114 ---> [37, 53]
disable ---> [37]
bearing ---> [37, 53, 63, 64]
proceedings.instead ---> [37]
116 ---> [37, 52, 53, 58]
delaney ---> [37, 52]
entertained ---> [37, 62]
duly ---> [37]
conscientiously ---> [37]
conception ---> [37]
impartial ---> [37]
postpone ---> [37]
lapse ---> [37, 47]
instant ---> [37, 52, 62, 87]
case.in ---> [37, 62]
laxity.also ---> [37]
procured ---> [37]
118 ---> [37, 53, 76, 87]
rejecting

65121 ---> [39]
22.investment ---> [39]
65931 ---> [39]
23.program ---> [39]
87221 ---> [39]
24.cable ---> [39]
87222 ---> [39]
25.satellite ---> [39]
87223 ---> [39]
26.news ---> [39]
88100 ---> [39]
27.radioactive ---> [39]
90230 ---> [39]
investment.in ---> [39]
pro-forma ---> [39]
hours.applications ---> [39]
invoked.exceptions ---> [39]
joint-venture ---> [39]
sectors.most ---> [39]
longer.the ---> [39]
projects.the ---> [39, 70, 99]
rokgs ---> [39]
procurement ---> [39, 70]
remain.korea ---> [39]
accession ---> [39, 40]
wto ---> [39, 57]
agreement.in ---> [39]
favorably ---> [39]
pre-establishment ---> [39]
investment.restrictions ---> [39]
increased.currently ---> [39]
kepco ---> [39]
telecom ---> [39]
.foreign ---> [39, 55]
kt ---> [39]
dacom ---> [39]
privatize ---> [39]
state-owned ---> [39]
corporations.the ---> [39]
privatized ---> [39]
2000.korea ---> [39]
mofe ---> [39]
services.effective ---> [39]
578 ---> [39]
incentives.in ---> [39]
broadened ---> [39, 40, 47, 53]
divi

perham ---> [42]
cabin ---> [42, 63, 87]
lake.perham ---> [42]
ottertail ---> [42]
west-central ---> [42, 69]
state.when ---> [42]
8-11 ---> [42]
10-13 ---> [42]
toads.we ---> [42]
cooler ---> [42]
25-30 ---> [42]
fun ---> [42, 87]
anymore.when ---> [42]
swamp ---> [42]
frog.of ---> [42]
be.__________________________________________________________________9 ---> [42]
program.i ---> [42, 44]
sites.these ---> [42, 69, 87]
vegetative ---> [42, 69]
invading ---> [42]
hydroperiod ---> [42]
development.just ---> [42]
gyrula ---> [42]
pristine ---> [42, 69]
deepwater ---> [42, 52, 98]
cypress ---> [42]
sw ---> [42, 76, 79]
florida.these ---> [42]
overun ---> [42]
west-indian ---> [42]
grass ---> [42, 69, 99]
hymenachne ---> [42]
agressively ---> [42]
lilypads ---> [42]
nuphar ---> [42]
sp ---> [42, 82]
pickerel ---> [42]
pontedaria ---> [42]
lanceolata ---> [42]
year.i ---> [42]
hylids ---> [42]
ranids ---> [42]
diameter ---> [42, 44, 52, 60, 69]
5-20 ---> [42]
sighting ---> [42, 44, 87]
frog

palpitation ---> [48, 58, 75]
ventricular ---> [48]
fibrillation.the ---> [48]
death.it ---> [48]
disorganized ---> [48]
400-600 ---> [48]
reversed.5,6iii.action ---> [48]
vesselsa ---> [48]
ephedra/ephedrinethe ---> [48]
energy.these ---> [48]
brain.they ---> [48]
methamphetamine.7 ---> [48]
ephedra.ephedra ---> [48]
ephedrine ---> [48, 58, 75]
sympathomimetic ---> [48, 75]
amine.that ---> [48]
above.ephedra ---> [48]
alpha ---> [48, 82]
2-adrenergic ---> [48]
dopamine ---> [48]
.ephedra ---> [48]
synthesized ---> [48, 75]
actions.b ---> [48]
caffeinemost ---> [48]
guarana ---> [48, 75]
seed.caffeine ---> [48]
anti-vagal ---> [48]
antagonizing ---> [48]
vasoconstriction ---> [48]
dopamine.importantly ---> [48]
activates ---> [48]
activity.these ---> [48]
summate ---> [48]
caffeine.c ---> [48]
effectsthe ---> [48]
to:1 ---> [48]
pressure2 ---> [48]
rate3 ---> [48]
potassium ---> [48]
bloodthese ---> [48]
ephedra/caffeine ---> [48, 75]
deathiv.what ---> [48]
amine ---> [48]
caffeine.no 

78-211* ---> [53]
h.j ---> [53]
1711943 ---> [53]
78-235* ---> [53]
36871944 ---> [53]
78-495* ---> [53]
55641945 ---> [53]
79-214* ---> [53]
43091946 ---> [53]
1946 ---> [53, 89]
79-719* ---> [53]
70371947 ---> [53]
1947 ---> [53, 89]
80-379* ---> [53]
38181948 ---> [53]
80-492* ---> [53]
50521948 ---> [53]
80-642* ---> [53]
2961950 ---> [53]
81-734* ---> [53]
60001952 ---> [53]
82-590* ---> [53]
78001954 ---> [53]
83-761* ---> [53]
93661956 ---> [53]
84-880* ---> [53]
72251958 ---> [53]
135491960 ---> [53]
125801961 ---> [53]
60271964 ---> [53]
118651965 ---> [53]
66751966 ---> [53]
127521967 ---> [53]
120801969 ---> [53]
132701971 ---> [53]
46901972 ---> [53]
losses ---> [53]
153901972 ---> [53]
11973 ---> [53]
113331977 ---> [53]
95-216 ---> [53]
93461980 ---> [53]
32361980 ---> [53]
76701980 ---> [53]
52951981 ---> [53]
39821981 ---> [53]
43311983 ---> [53]
70931983 ---> [53]
19001984 ---> [53]
37551985 ---> [53]
3721985 ---> [53]
fy86 ---> [53]
s.con.res.321986 ---> [53]
53001987

247-d ---> [53]
0-d^ ---> [53]
leath ---> [53]
352 ---> [53]
155-r ---> [53]
1985.^ ---> [53]
package.among ---> [53]
surfaced ---> [53, 89]
taxes.ultimately ---> [53]
savings.ff ---> [53]
1986president ---> [53]
5300 ---> [53]
1986.during ---> [53]
year.p.l ---> [53]
1986.1.senate ---> [53]
2706 ---> [53, 76]
sept. ---> [53, 54, 58]
354 ---> [53]
71-r ---> [53]
355 ---> [53]
193-d ---> [53]
r-51 ---> [53]
d-19 ---> [53]
gg ---> [53]
1987h.r ---> [53]
3545 ---> [53]
reagan.several ---> [53]
security.p.l ---> [53]
inactive ---> [53]
reentitled ---> [53]
1988.1.house ---> [53]
fy1988 ---> [53]
h.con.res.93 ---> [53]
.earlier ---> [53]
security.two ---> [53]
reagan.in ---> [53]
lengthen ---> [53]
205-d ---> [53]
164-r ---> [53]
357 ---> [53]
provisions.a.on ---> [53]
kassebaum ---> [53]
358 ---> [53]
vote.3.conference ---> [53]
130-r ---> [53]
359 ---> [53]
hh ---> [53]
1988on ---> [53]
4333 ---> [53]
1988.in ---> [53]
security.among ---> [53]
nazis ---> [53]
security-covered ---> [53]
ph

locked ---> [55, 63]
communal ---> [55]
apartment ---> [55, 69]
van.many ---> [55]
age.juveniles ---> [55]
piecework ---> [55]
basis.child ---> [55]
family-operated ---> [55]
widespread.e ---> [55]
wage.however ---> [55]
397.88 ---> [55]
dinar ---> [55]
employees.compliance ---> [55]
suggest.for ---> [55]
bonus ---> [55]
salary.however ---> [55]
family.the ---> [55]
health.the ---> [55, 69, 79]
violators.the ---> [55]
courts.once ---> [55]
lodge ---> [55]
law.complaints ---> [55]
days.in ---> [55, 87, 89]
employee.under ---> [55]
jeopardy ---> [55]
firing.because ---> [55]
allowance ---> [55, 87]
allowances.western ---> [55]
former.women ---> [55]
day.however ---> [55]
men.in ---> [55]
announcing ---> [55, 87]
companies.according ---> [55]
mistreatment.however ---> [55]
origin.sponsors ---> [55]
cancel ---> [55, 74]
wrongful ---> [55]
dismissal.legislation ---> [55]
employers.however ---> [55]
workers.they ---> [55]
.between ---> [55]
.unverified ---> [55]
suicide.a ---> [55]
employmen

1964.13 ---> [56]
tolowa ---> [56]
california.the ---> [56, 61, 98]
narration ---> [56]
obsidian ---> [56]
chipping ---> [56]
weapons.odyssey ---> [56]
franz ---> [56]
americansorigin ---> [56]
ba'ts'oosee ---> [56]
3743 ---> [56]
1978.40 ---> [56]
series.apache ---> [56]
storyteller ---> [56, 76]
rudolph ---> [56]
kane ---> [56]
great-grandchildren.our ---> [56]
9793 ---> [56]
eagle.28 ---> [56]
dakota.many ---> [56]
interviewed.people ---> [56]
1397 ---> [56]
culp ---> [56]
asner.28 ---> [56]
bshows ---> [56]
karuk ---> [56]
crafts.pine ---> [56]
nuts ---> [56]
1226 ---> [56, 76]
series.paiute ---> [56]
washo ---> [56]
purposes.place ---> [56]
1391 ---> [56]
brekke ---> [56]
brekke.16 ---> [56]
reel.this ---> [56]
artifacts.probable ---> [56]
7785 ---> [56]
tocayos ---> [56]
wilson.60 ---> [56]
choosing ---> [56, 73, 79]
continuity ---> [56]
land.indian ---> [56]
grindstone ---> [56]
options.queen ---> [56]
6083 ---> [56]
reel.re-creates ---> [56]
paintings.return ---> [56]
raven ---

in.. ---> [59]
1\2 ---> [59]
copied.you ---> [59]
shop.. ---> [59]
.remember ---> [59]
winners.. ---> [59]
4,000,000 ---> [59]
zboch.now ---> [59]
rights.. ---> [59]
.think ---> [59]
minute.there ---> [59]
3000 ---> [59]
manual.. ---> [59]
.that ---> [59, 62]
45,000 ---> [59]
magazine.. ---> [59]
there.. ---> [59]
money.. ---> [59]
mail.. ---> [59]
wish.. ---> [59]
this.. ---> [59]
199.00 ---> [59]
.but ---> [59, 62, 79]
drawback.. ---> [59]
sold.it ---> [59]
people.. ---> [59, 80]
that.to ---> [59]
years.. ---> [59]
.only ---> [59]
199.00.. ---> [59]
late.. ---> [59]
un-cashed.. ---> [59]
written.the ---> [59]
cents.. ---> [59]
set.. ---> [59]
.push ---> [59]
million-dollar ---> [59]
.sincerely ---> [59]
.28.through ---> [59]
period.29.in ---> [59]
period.rather ---> [59]
thereafter.therefore ---> [59]
misleading.30.through ---> [59]
reproduce ---> [59, 69]
year.31.in ---> [59]
misleading.32.through ---> [59]
made.33.in ---> [59]
misleading.34.the ---> [59]
deceptive ---> [59, 66, 80]

smoothly ---> [63]
unfurled.today ---> [63]
walks.sts-61 ---> [63]
bay.a ---> [63]
glitch ---> [63]
imprecise ---> [63]
ku-band ---> [63]
controllers.thursday ---> [63]
9:30 ---> [63]
telescope.astronauts ---> [63]
thursday.shortly ---> [63]
pushed ---> [63, 87]
unfold ---> [63]
day.hoffman ---> [63]
a.m.cst.crew ---> [63]
mylar ---> [63]
degradation.shortly ---> [63]
perched ---> [63]
tribute ---> [63]
heads.what ---> [63]
reflection ---> [63]
orbits.musgrave ---> [63]
4:51 ---> [63]
a.m.cst.ground ---> [63]
refurbished ---> [63]
antennas ---> [63]
5:55 ---> [63]
a.m.cst.during ---> [63]
sade ---> [63]
far.crew ---> [63]
today.hubble ---> [63]
13-week ---> [63]
journey.hubble ---> [63]
1:08 ---> [63]
friday.all ---> [63]
93-12-09b ---> [63]
centerthursday ---> [63]
space-walking ---> [63]
friday.release ---> [63]
1:13 ---> [63]
7/21:46 ---> [63]
subsystems.in ---> [63]
sun.when ---> [63]
redeploy ---> [63]
torquers.about ---> [63]
arm.forty ---> [63]
umbilical ---> [63, 87]
system.nic

sierra ---> [69, 87]
cordillera ---> [69]
disjunction ---> [69]
rarity ---> [69]
endemism ---> [69]
limestone-derived ---> [69]
topographic ---> [69]
diversity.2.3 ---> [69]
resources.great ---> [69]
valleys.seven ---> [69]
nnhp ---> [69]
boundaries.many ---> [69]
grba-sensitive ---> [69]
documented.no ---> [69]
range.rare ---> [69]
sandwort ---> [69]
arenaria ---> [69]
congesta ---> [69]
wheelerensis ---> [69]
intermountain ---> [69]
species.endemic ---> [69]
subspecies ---> [69, 71]
range.listed ---> [69]
nnnps ---> [69]
watch-list ---> [69]
g5s1 ---> [69]
nnhp.presence ---> [69]
herbarium ---> [69]
subspecific ---> [69]
taxonomy ---> [69]
documented.alpine ---> [69]
subalpine ---> [69]
environments.threatened ---> [69]
grazing ---> [69, 87]
areas.tunnel ---> [69]
beardtongue ---> [69]
penstemon ---> [69]
concinnus ---> [69]
species.in ---> [69, 87, 98]
usfws ---> [69]
g3s2 ---> [69]
confirmed.gravelly ---> [69]
mid-elevation ---> [69]
alluvial ---> [69]
slope ---> [69, 87, 98]
sageb

dial ---> [73]
operators.once ---> [73]
perused ---> [73]
leaders.furthermore ---> [73]
on.the ---> [73]
hotline.this ---> [73]
24-hours ---> [73]
personnel.all ---> [73]
wallet ---> [73]
emergency.depending ---> [73]
designees ---> [73]
gained.without ---> [73]
disposal.callers ---> [73]
uninformed ---> [73]
crisis.finally ---> [73]
tying ---> [73]
scenarios.the ---> [73]
groundwork ---> [73, 99]
table-top ---> [73]
simulation.some ---> [73]
in-country ---> [73]
partners.too ---> [73]
advertising.due ---> [73]
diligence ---> [73]
unearth ---> [73]
hassle ---> [73]
lives.contracting ---> [73]
medevac ---> [73]
organization.in-country ---> [73]
dicey ---> [73]
services.considering ---> [73]
kidnap ---> [73]
ransom ---> [73, 76]
insurance.the ---> [73]
painless ---> [73]
speedy ---> [73]
confusing ---> [73]
time.kidnappings ---> [73]
amounts.all ---> [73]
readiness ---> [73]
eeurope.gif ---> [74]
11.5 ---> [74]
boiling ---> [74]
siberian ---> [74]
bilibino.the ---> [74]
legacythe ---> [7

5916 ---> [76]
64111muse ---> [76]
musicians ---> [76]
cresent ---> [76]
14214monadnock ---> [76]
munsonville ---> [76]
03457morning ---> [76]
3672 ---> [76]
19807motif ---> [76]
43210muddy ---> [76]
1244 ---> [76]
65102the ---> [76]
museletter ---> [76]
lanes ---> [76]
wrentham ---> [76]
02093-0323music ---> [76]
semfolk ---> [76]
arnold ---> [76]
bedford ---> [76]
02740musical ---> [76]
ashbrook ---> [76]
8310 ---> [76]
20907muskeg ---> [76]
778 ---> [76]
03755neffa ---> [76]
02140nhac ---> [76]
viet ---> [76]
vietnamese ---> [76]
44240name-ye-shayda ---> [76]
shayda ---> [76]
19336 ---> [76]
gaithersburg ---> [76, 79]
20879nashville ---> [76]
4781 ---> [76]
37216natchitoches ---> [76]
71457national ---> [76]
d.c.blues ---> [76]
77315 ---> [76]
d.c.20013-7715national ---> [76]
fiddlernational ---> [76]
54055 ---> [76]
39208nevada ---> [76]
germain ---> [76]
7969 ---> [76]
rodeo ---> [76]
89119new ---> [76]
02141new ---> [76]
brattleboro ---> [76]
05301the ---> [76]
jubilee ---> [76]


prostateif ---> [80]
holistic ---> [80]
remedy.for ---> [80]
hyperlink ---> [80]
testimonials ---> [80]
usersi ---> [80]
fatigue.the ---> [80]
wonderful.the ---> [80]
immediate.. ---> [80]
brother-in ---> [80]
essiac.i ---> [80]
gone.. ---> [80]
.my ---> [80]
nephew ---> [80, 89]
cancer.. ---> [80]
remission ---> [80]
cancer.under ---> [80]
chemotherapy.i ---> [80]
psa ---> [80]
widespread.. ---> [80]
it.well ---> [80]
remarkable.i ---> [80]
sixty-two ---> [80]
sixteen ---> [80]
back.. ---> [80]
cancer.i ---> [80]
session.every ---> [80]
so-o-o ---> [80]
hemo ---> [80]
fantastic ---> [80]
patient.. ---> [80]
canada.after ---> [80]
'essiac ---> [80]
cancer'.in ---> [80]
1950s ---> [80, 89]
brusch ---> [80]
massachusetts.after ---> [80]
.will ---> [80]
xxxxxxx ---> [80]
illness.for ---> [80]
gary ---> [80]
glum ---> [80]
die.they ---> [80]
carini ---> [80]
histoplasmosis.their ---> [80]
ten.the ---> [80]
with.he ---> [80]
ounce ---> [80]
day.by ---> [80]
died.dr ---> [80]
alive.they --->

education.some ---> [87]
shifting ---> [87]
help.ordinary ---> [87]
research.dfg ---> [87]
fund.non-hunting ---> [87]
enthusiast ---> [87]
wildlands ---> [87]
3211 ---> [87]
95816 ---> [87]
checkoff ---> [87]
fund.see ---> [87]
540.-second ---> [87]
pioneering ---> [87]
country.situations ---> [87]
ourselves.and ---> [87]
out.keep ---> [87]
night.put ---> [87]
top.light ---> [87]
property.use ---> [87]
them.-finally ---> [87]
safe.keep ---> [87]
areas.all ---> [87]
children.clark ---> [87]
prey.stand ---> [87]
tall.carry ---> [87]
stick.if ---> [87]
run.don ---> [87]
crouch.never ---> [87]
children.appear ---> [87]
can.throw ---> [87]
stones.shout ---> [87]
advise.fjelline ---> [87]
posture ---> [87]
mistook ---> [87]
minute.that ---> [87]
sites.her ---> [87]
sens ---> [87]
smell ---> [87]
see.she ---> [87]
humans.but ---> [87]
size.men ---> [87]
bigger.or ---> [87]
humans.you ---> [87]
yourself.no ---> [87]
blame ---> [87]
it.haven ---> [87]
free-lance ---> [87]
cool.dfg ---> [87]
com

33131 ---> [92]
1715 ---> [92]
21203.as ---> [92]
9:00 ---> [92, 94]
salon ---> [92]
marriott ---> [92]
airport.interested ---> [92]
hearing.persons ---> [92]
202/377-2862 ---> [92]
4.instead ---> [92]
1986.a ---> [92]
1529 ---> [92]
20230.dated ---> [92]
1986.john ---> [92]
doc.86-3291 ---> [92]
2-13-86 ---> [92]
8:45 ---> [92]
3510-ds-m ---> [92]
treasemblem.jpg ---> [93]
622-2960 ---> [93]
514-2007 ---> [93]
d.c.c ---> [93]
decline.task ---> [93]
vigilance ---> [93]
well-publicized ---> [93]
efforts.in ---> [93]
worship.in ---> [93]
140.these ---> [93]
1999.this ---> [93]
2000.as ---> [93]
36.2 ---> [93]
945 ---> [93]
investigated.the ---> [93]
horrific ---> [93]
fires ---> [93]
sear ---> [93]
law.for ---> [93]
431 ---> [93]
1999.sentencing ---> [93]
prison.this ---> [93]
created.ballinger ---> [93]
georgia.the ---> [93]
www.atf.treas.gov/pub ---> [93]
00-542 ---> [93]
train/subway ---> [94]
fare ---> [94]
4.00.use ---> [94]
stations.you ---> [94]
information.hilton ---> [94]
tysons

In [317]:
len(invIndex)

42609

### Lookup for Terms in Inverted Index

In [None]:
term = "cancer"
lookupResult = invObj.termLookup(term)

In [None]:
print("No of Documents with Term {} is {} ".format(term,len(lookupResult)))

### Saving Invertex Index to CSV

In [None]:
import csv
w = csv.writer(open("InvertedIndex.csv", "w"))
for key, val in invIndex:
    w.writerow([key, val])

### Size of Invertex Index with 100 corpus documents.

In [None]:
print(len(invIndex))

### Load Lemmatized Corpus

In [2]:
path = "/Volumes/JK/AI-Search-Engine/src/Data/Slack_Data/Lemmatized Corpus/"
os.chdir(path)

In [3]:
filelist = os.listdir(path)
filelist

['doc1.txt',
 'doc10.txt',
 'doc100.txt',
 'doc1000.txt',
 'doc1001.txt',
 'doc1002.txt',
 'doc1003.txt',
 'doc1004.txt',
 'doc1005.txt',
 'doc1006.txt',
 'doc1007.txt',
 'doc1008.txt',
 'doc1009.txt',
 'doc101.txt',
 'doc1010.txt',
 'doc1011.txt',
 'doc1012.txt',
 'doc1013.txt',
 'doc1014.txt',
 'doc1015.txt',
 'doc1016.txt',
 'doc1017.txt',
 'doc1018.txt',
 'doc1019.txt',
 'doc102.txt',
 'doc1020.txt',
 'doc1021.txt',
 'doc1022.txt',
 'doc1023.txt',
 'doc1024.txt',
 'doc1025.txt',
 'doc1026.txt',
 'doc1027.txt',
 'doc1028.txt',
 'doc1029.txt',
 'doc103.txt',
 'doc1030.txt',
 'doc1031.txt',
 'doc1032.txt',
 'doc1033.txt',
 'doc1034.txt',
 'doc1035.txt',
 'doc1036.txt',
 'doc1037.txt',
 'doc1038.txt',
 'doc1039.txt',
 'doc104.txt',
 'doc1040.txt',
 'doc1041.txt',
 'doc1042.txt',
 'doc1043.txt',
 'doc1044.txt',
 'doc1045.txt',
 'doc1046.txt',
 'doc1047.txt',
 'doc1048.txt',
 'doc1049.txt',
 'doc105.txt',
 'doc1050.txt',
 'doc1051.txt',
 'doc1052.txt',
 'doc1053.txt',
 'doc1054.txt',
 'd

In [4]:
corpus = []
for i in filelist:
     with open("{}".format(i)) as f_input:
            corpus.append(f_input.read())

In [6]:
#Lemmatized Corpus
corpus[0]

"Link National Cancer Institute Center Cancer Research Link Contact CCR Link CCR Homepage Link NCI home Link NIH home _______________ Search Comparative Oncology Program Home Introduction Specific Aims Time Line- For Implementation Background The CCR - Comparative Oncology Program compliment number new CCR initiative designed improve translational process.Program Announcement - 9/2003 Comparative Oncology refers study naturally developing cancer animal model human disease.A significant under-utilized group naturally occurring cancer develops pet animal , primarily cat dogs.These large animal cancer share many feature human cancer including tumor histology , genetics , response conventional therapy biological behavior.Examples model include : Osteosarcoma Non-Hodgkin 's Lymphoma Breast Cancer Head Neck Carcinoma Prostate Cancer Soft Tissue Sarcoma Melanoma Virally-Induced-LymphomaThrough design clinical trial include pet animal broader understanding treatment biology cancer attained.Man

### Convert Corpus to Vectors

In [7]:
from gensim.models import Word2Vec
from nltk.tokenize import sent_tokenize, word_tokenize 

In [26]:
def processCorpus(train):
    traincorp = []
    for i in train:
        traincorp.append(word_tokenize(i))
    return traincorp

In [35]:
train = corpus

In [36]:
traincorp = processCorpus(train)

In [37]:
modelW2V = Word2Vec(traincorp, min_count=1)

In [38]:
modelW2V.save('modelfull.bin')

In [39]:
path = "/Volumes/JK/AI-Search-Engine/src/Data/Slack_Data/"

* Refer : https://machinelearningmastery.com/develop-word-embeddings-python-gensim/ 
* Refer : https://www.geeksforgeeks.org/tokenize-text-using-nltk-python/
* Refer : https://stackoverflow.com/questions/55713132/how-to-tokenize-a-list-of-lists-in-python

### Model Based Similarity

##### Score of using our Model to compare Cancer with similar words such as Ovarian, Tumor

In [53]:
modelW2V.wv.similarity('cancer', 'tumor') 

0.8035345

In [52]:
modelW2V.wv.similarity('cancer','ovarian')

0.86470807

##### Score of using our Model to compare Cancer with dissimilar words such as Cloud

In [51]:
modelW2V.wv.similarity('cancer','cloud')

0.20746103

##### Finding top 10 most similar words to word Cancer in our dataset

In [49]:
v1 = "cancer"
modelW2V.wv.most_similar(positive=v1)

[('prostate', 0.9280345439910889),
 ('ovarian', 0.8647080659866333),
 ('cancer.The', 0.8155494928359985),
 ('tumor', 0.8035344481468201),
 ('breast', 0.792565107345581),
 ('cancers.The', 0.781282365322113),
 ('colorectal', 0.77799391746521),
 ('cancer.In', 0.7480220794677734),
 ('malignant', 0.7331823706626892),
 ('cancer.Dietary', 0.7293593883514404)]

## Doc2Vec Model for Finding out document similarity

In [64]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize

In [65]:
tagged_data = [TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) for i, _d in enumerate(corpus)]

### Traning the Model

In [1]:
max_epochs = 10
vec_size = 20 
alpha = 0.025

In [228]:
model = Doc2Vec(size=vec_size,alpha=alpha, min_alpha=0.00025,min_count=50,dm =1)



In [229]:
model.build_vocab(tagged_data)

In [230]:
for epoch in range(max_epochs):
    print('iteration {0}'.format(epoch))
    model.train(tagged_data,
                total_examples=model.corpus_count,
                epochs=model.iter)
    # decrease the learning rate
    model.alpha -= 0.0002
    # fix the learning rate, no decay
    model.min_alpha = model.alpha

iteration 0


  """


iteration 1
iteration 2
iteration 3
iteration 4
iteration 5
iteration 6
iteration 7
iteration 8
iteration 9


In [232]:
model.save("min50.model")

### Cosinie Similarity between Documents

In [233]:
from scipy import spatial

In [234]:
vec1 = model.infer_vector(corpus[1].split())
vec2 = model.infer_vector(corpus[3].split())

In [235]:
similairty = spatial.distance.cosine(vec1, vec2)
similairty

0.6142655909061432

In [239]:
new_sentence = "i love dogs".split(" ") 

In [240]:
model.docvecs.most_similar(positive=[model.infer_vector(new_sentence)],topn=5)

[('5235', 0.7422172427177429),
 ('4870', 0.7328481674194336),
 ('95', 0.7185875773429871),
 ('5868', 0.7118589878082275),
 ('1954', 0.6987151503562927)]

In [244]:
corpus[5868]

"DL-1 Rev . 12/95DOG IDENTIFICATION License No.Date Issues Dog Breed Dog Color ( ) Other ID Markings Dog 's Yr. Birth Last 2 Digits Dog 's Name Chk Code Expiration Date Code Code ( ) New York State Department Agriculture Markets Division Animal Industry 1 Winners Circle - Albany , New York 12235 518-457-2728DOG LICENSE Issuing County Code/TCV CodeRABIES CERTIFICATE REQUIRED Rabies Vaccine : Manufacturer __________________________ Serial Number __________________________LICENSE TYPEOne Year Vacc.Three Year Vacc.ORIGINAL RENEWAL TRANSFER OF OWNERSHIPDate Vaccinated ______________________ Veterinarian ______________________________Owner Identification ( Person harbor keep dog ) : Last First Middle InitialOWNER 'S PHONE NO.Area CodeMailing Address : House No.Street R.D . No . P.O . Box No.Phone No.CityStateZipCounty CodeCountyTown , City VillageTown , City , Vil . CodeTYPE OF LICENSE 1.2.3.Female , spayed Male , unneutered 6 month 4.6 mo . 6 month Male , neuteredState fee Spay/Neuter Fee 2

In [245]:
model = Doc2Vec.load("d2v.model")

In [246]:
query = "i love dogs".split(" ") 

In [250]:
model.docvecs.most_similar(positive=[model.infer_vector(query)],topn=5)

[('638', 0.7319365739822388),
 ('1041', 0.6784754991531372),
 ('2317', 0.6569496989250183),
 ('5235', 0.6252104043960571),
 ('2664', 0.6232786178588867)]

In [251]:
#Corpus Regarding Euthanesia, Animal Health, Veterinarian etc. These WORDS are Similar in semantic similarity with Dogs!
corpus[638]

"ISSN : 1052-5378 AWIC Animal Euthanasia Provided Animal Welfare Information Center United States Department Agriculture National Agricultural Library United States Department Agriculture National Agricultural Library 10301 Baltimore Blvd.Beltsville , Maryland 20705-2351SRB 93-06Special Reference Briefs SeriesAnimal EuthanasiaUpdated Special Reference Briefs Series no.SRB 98-01Michael D. Kreger Cynthia Petrie Smith Jennifer Lyons-CarterAnimal Welfare Information CenterJuly 1993National Agricultural Library Cataloging Record : Kreger , Michael D. Animal euthanasia . ( Special reference brief ; 93-06 ) 1.Euthanasia animal -- Bibliography.I . Smith , Cynthia Petrie.II.Lyons-Carter , Jennifer.III.Title.aS21.D27S64 no.93-06ANIMAL EUTHANASIATABLE OF CONTENTS* Introduction.. . . . . . . . . ... . . . . . . . . . . . .ii * General Veterinary Use . . . . . . . . . . . . . . . . . . ..1 * Teaching.. . . . . . . . . . . . . . . . . . . . . . . . ..7 * Human/Animal Bond.. . . . . . . . . . . . . .

#### Benefits of Semantic Similarity based Ranking is, documents related in semantic similarity with the query is returned. Using this to initialty filter results (documents) would be ideal.

In [261]:
query = "Healthcare Research".split(" ") 

In [263]:
model.docvecs.most_similar(positive=[model.infer_vector(query)],topn=5)

[('5508', 0.6469775438308716),
 ('5800', 0.6222918033599854),
 ('5692', 0.6181532144546509),
 ('6304', 0.6029208898544312),
 ('997', 0.5998862981796265)]

In [265]:
corpus[5508]

"Annual Report COMMONWEALTH OF VIRGINIA 2002 Table Contents Executive Summary ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .ii State-wide Reach ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .iii Future Direction ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... .. iv Activities Governor 's Office Substance Abuse Prevention ... ... ... ... ... 1 Statutory Authority ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 1 GOSAP Organization Operation ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 1 Funding ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... 2 GOSAP Philo

In [294]:
query = "safety".split(" ") 
model.docvecs.most_similar(positive=[model.infer_vector(query)],topn=5)

[('107', 0.7906770706176758),
 ('2530', 0.7669410109519958),
 ('3664', 0.7627139687538147),
 ('2932', 0.7612925171852112),
 ('4881', 0.7560254335403442)]

In [295]:
corpus[107]

"Skip common site navigation header United States Environmental Protection Agency Clean Air Markets - Environmental Issues Recent Additions | Contact Us | Print Version Search : __________ Run Search Begin Hierarchical Links EPA Home The reported damage typically occurs horizontal surface appears irregularly shaped , permanently etched areas.The damage best detected fluorescent lamp , easily observed dark colored vehicle , appears occur evaporation moisture droplet.In addition , evidence suggests damage occurs frequently freshly painted vehicles.Usually damage permanent ; occurred , solution repaint.The general consensus within auto industry damage caused form environmental fallout . `` Environmental fallout , '' term widely used auto coating industry , refers damage caused air pollution ( e.g. , acid rain ) , decaying insect , bird droppings , pollen , tree sap.The result laboratory experiment least one field study demonstrated acid rain scar automotive coatings.Furthermore , chemical

In [301]:
newmodel = Doc2Vec.load("d2vmin50.model")

[('4981', 0.6771769523620605),
 ('1759', 0.6741712093353271),
 ('4465', 0.6733023524284363),
 ('5965', 0.6625094413757324),
 ('3294', 0.6572967767715454)]

In [307]:
query = "dog".split(" ") 
newmodel.docvecs.most_similar(positive=[model.infer_vector(query)],topn=5)

[('748', 0.7187636494636536),
 ('1324', 0.7062160968780518),
 ('535', 0.7006916403770447),
 ('3253', 0.6962764859199524),
 ('544', 0.6899124383926392)]

In [308]:
corpus[748]

"810 KAR 1:001.Definitions.RELATES TO : KRS 230.210 230.360STATUTORY AUTHORITY : KRS 230.260NECESSITY , FUNCTION , AND CONFORMITY : To regulate condition thoroughbred racing shall conducted Kentucky.The function administrative regulation define term used commission 's administrative regulations.Section 1.Definitions . ( 1 ) `` Added money '' mean cash , exclusive trophy award , added association stake fee paid subscriber form total purse stake race . ( 2 ) `` Age '' mean number year since horse foaled , reckoned horse foaled January 1 year horse foaled . ( 3 ) `` Arrears '' mean sum due licensee reflected account horseman 's bookkeeper , including subscription , jockey fee , forfeiture , default incident administrative regulation . ( 4 ) `` Association '' mean person legal entity , required licensed KRS 230.300 conduct race meeting , used herein , association conducting race meeting rule applicable . ( 5 ) `` Authorized agent '' mean person currently licensed agent licensed owner princ

In [2]:
import os

In [3]:
os.getcwd()

'/Volumes/JK/AI-Search-Engine/src/Models'

In [4]:
path = "/Volumes/JK/AI-Search-Engine/src/Data/Slack_Data/Models/"

In [5]:
os.chdir(path)

In [6]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument


In [7]:
model = Doc2Vec.load("d2v.model")

In [8]:
model

<gensim.models.doc2vec.Doc2Vec at 0x110a9f048>