In [42]:
import nltk
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import Normalizer 

### Training text for identifying lurking topics ( few IELTS passages)

In [1]:
f = open("train.txt",'r',encoding ="utf-8")
text = f.read()

### Text of interest ( IELTS passage under inspection )

In [44]:
text = text + open("test.txt",'r',encoding = "utf8").read()

In [45]:
sents = nltk.sent_tokenize(text)

In [46]:
vectorizer = TfidfVectorizer()

### Identifying non obvious stop words

In [47]:
vectorizer.stop_words = { x for x,y in nltk.FreqDist( nltk.word_tokenize(text)).most_common(250)  }

In [48]:
dtm = vectorizer.fit_transform(sents)

  sorted(inconsistent))


In [49]:
lsa = TruncatedSVD(50,algorithm = 'arpack')

In [50]:
dtm_lsa = lsa.fit_transform(dtm)

In [51]:
test_input = open("test.txt",'r',encoding = "utf8").read()

In [52]:
test_sents = nltk.sent_tokenize(test_input)

In [53]:
len(test_sents)

37

### Search text taken from  IELTS passage

In [54]:
question = "What did researchers identify as the ideal time to wake up in the morning?"
test_sents.append(question)

In [55]:
dtms_ = vectorizer.transform(test_sents)

In [56]:
dtm_lsa_ = lsa.transform(dtms_)

In [57]:
similarity_ = np.asarray(np.asmatrix
                         (dtm_lsa_)*np.asmatrix(dtm_lsa_).T )

In [58]:
similarity_ = Normalizer().fit_transform(similarity_)

In [59]:
answer = pd.DataFrame(pd.DataFrame(similarity_,index = test_sents,columns = test_sents).loc[:,question])

In [60]:
answer = answer.sort_values (by = [question],ascending = False).loc[answer[question]
                                        > 0.05*answer[question].max() ]

In [36]:
 answer1 = answer

###  Search results from my model

#### Extracting relevant text pertaining to the answer for the questions of IELTS taken from  

[IELTS website](https://takeielts.britishcouncil.org/prepare-test/practice-tests/reading-practice-test-1-academic/reading-passage-1)

####  1) What did researchers identify as the ideal time to wake up in the morning?

######  A) 6.04

######  B) 7.00

######  C) 7.22

###### D) 7.30

In [37]:
answer1

Unnamed: 0,What did researchers identify as the ideal time to wake up in the morning?
What did researchers identify as the ideal time to wake up in the morning?,0.295685
Morning is also great for breaking out the vitamins.,0.216872
"‘Night people’, for example, often describe how they find it very hard to operate during the morning, but become alert and focused by evening.",0.19709
"Once you’re up and ready to go, what then?",0.151168
"It is essential that, by the time you are ready to sleep, your body is rid of all traces.",0.126948
"The average urban resident, for example, rouses at the eye-blearing time of 6.04 a.m., which researchers believe to be far too early.",0.072402
"The optimum moment has been whittled down to 7.22 a.m.; muscle aches, headaches and moodiness were reported to be lowest by participants in the study who awoke then.",0.065938


### 2) In order to lose weight, we should

##### A) avoid eating breakfast

##### B) eat a low carbohydrate breakfast

##### C) exercise before breakfast

##### D) exercise after breakfast

In [61]:
answer2 = answer

In [62]:
answer2

Unnamed: 0,"In order to lose weight, we should"
"In order to lose weight, we should",0.59637
The recommended course of action is to follow an intense workout with a carbohydrate-rich breakfast; the other way round and weight loss results are not as pronounced.,0.142004
"Evenings are important for winding down before sleep; however, dietician Geraldine Georgeou warns that an after-five carbohydrate-fast is more cultural myth than chronobiological demand.",0.123844


### 3) The best time to stop drinking coffee is

##### A) mid-afternoon

##### B) 10 p.m.

##### C) only when feeling anxious

##### D) after dinner

In [86]:
answer3 = answer

In [87]:
answer3

Unnamed: 0,The best time to stop drinking coffee is
The best time to stop drinking coffee is,0.173887
"For improved absorption, Stone suggests pairing supplements with a food in which they are soluble and steering clear of caffeinated beverages.",0.147745
"Finally, Stone warns to take care with storage; high potency is best for absorption, and warmth and humidity are known to deplete the potency of a supplement.",0.145356
"Supplement absorption by the body is not temporal-dependent, but naturopath Pam Stone notes that the extra boost at breakfast helps us get energised for the day ahead.",0.136347
"After-dinner espressos are becoming more of a tradition – we have the Italians to thank for that – but to prepare for a good night’s sleep we are better off putting the brakes on caffeine consumption as early as 3 p.m. With a seven hour half-life, a cup of coffee containing 90 mg of caffeine taken at this hour could still leave 45 mg of caffeine in your nervous system at ten o’clock that evening.",0.097996
"Overloading your gut could lead to indigestion, though.",0.081734


#### Sources:

https://takeielts.britishcouncil.org/prepare-test/practice-tests/reading-practice-test-1-academic/reading-passage-1

https://opensourceconnections.com/blog/2016/03/29/semantic-search-with-latent-semantic-analysis/

https://www.datascienceassn.org/sites/default/files/users/user1/lsa_presentation_final.pdf