In [21]:
import spacy

In [22]:
nlp=spacy.load("en_core_web_sm")

In [23]:
txt="""New York City on Tuesday declared a public health emergency and ordered mandatory measles vaccinations amid an outbreak, becoming the latest national flash point over refusals to inoculate against dangerous diseases.

At least 285 people have contracted measles in the city since September, mostly in Brooklyn’s Williamsburg neighborhood. The order covers four Zip codes there, Mayor Bill de Blasio (D) said Tuesday.

The mandate orders all unvaccinated people in the area, including a concentration of Orthodox Jews, to receive inoculations, including for children as young as 6 months old. Anyone who resists could be fined up to $1,000."""

In [24]:
doc=nlp(txt)
txt

'New York City on Tuesday declared a public health emergency and ordered mandatory measles vaccinations amid an outbreak, becoming the latest national flash point over refusals to inoculate against dangerous diseases.\n\nAt least 285 people have contracted measles in the city since September, mostly in Brooklyn’s Williamsburg neighborhood. The order covers four Zip codes there, Mayor Bill de Blasio (D) said Tuesday.\n\nThe mandate orders all unvaccinated people in the area, including a concentration of Orthodox Jews, to receive inoculations, including for children as young as 6 months old. Anyone who resists could be fined up to $1,000.'

In [25]:
token=[]
for tk in doc:
    token.append(tk.text)
print(token)



['New', 'York', 'City', 'on', 'Tuesday', 'declared', 'a', 'public', 'health', 'emergency', 'and', 'ordered', 'mandatory', 'measles', 'vaccinations', 'amid', 'an', 'outbreak', ',', 'becoming', 'the', 'latest', 'national', 'flash', 'point', 'over', 'refusals', 'to', 'inoculate', 'against', 'dangerous', 'diseases', '.', '\n\n', 'At', 'least', '285', 'people', 'have', 'contracted', 'measles', 'in', 'the', 'city', 'since', 'September', ',', 'mostly', 'in', 'Brooklyn', '’s', 'Williamsburg', 'neighborhood', '.', 'The', 'order', 'covers', 'four', 'Zip', 'codes', 'there', ',', 'Mayor', 'Bill', 'de', 'Blasio', '(', 'D', ')', 'said', 'Tuesday', '.', '\n\n', 'The', 'mandate', 'orders', 'all', 'unvaccinated', 'people', 'in', 'the', 'area', ',', 'including', 'a', 'concentration', 'of', 'Orthodox', 'Jews', ',', 'to', 'receive', 'inoculations', ',', 'including', 'for', 'children', 'as', 'young', 'as', '6', 'months', 'old', '.', 'Anyone', 'who', 'resists', 'could', 'be', 'fined', 'up', 'to', '$', '1,00

Stop words removed 

In [26]:
from spacy.lang.en.stop_words import STOP_WORDS
def rmvst(t):
    newtxt=[]
    for word in t:
        lx=nlp.vocab[word]
        if lx.is_stop==True:
            newtxt.append('')
        else:
            newtxt.append(word)
    x=newtxt[:]
    newtxt.clear()
    return " ".join(x)

In [27]:
clean=rmvst(token)
print(clean)

New York City  Tuesday declared  public health emergency  ordered mandatory measles vaccinations amid  outbreak ,   latest national flash point  refusals  inoculate  dangerous diseases . 

   285 people  contracted measles   city  September ,   Brooklyn  Williamsburg neighborhood .  order covers  Zip codes  , Mayor Bill de Blasio ( D ) said Tuesday . 

  mandate orders  unvaccinated people   area , including  concentration  Orthodox Jews ,  receive inoculations , including  children  young  6 months old .   resists   fined   $ 1,000 .


In [28]:
import re
clean_filter=re.sub(r'[^a-zA-Z0-9 ]', r' ',clean)

In [29]:
print(clean_filter)

New York City  Tuesday declared  public health emergency  ordered mandatory measles vaccinations amid  outbreak     latest national flash point  refusals  inoculate  dangerous diseases        285 people  contracted measles   city  September     Brooklyn  Williamsburg neighborhood    order covers  Zip codes    Mayor Bill de Blasio   D   said Tuesday       mandate orders  unvaccinated people   area   including  concentration  Orthodox Jews    receive inoculations   including  children  young  6 months old     resists   fined     1 000  


NER

In [30]:
newtxt=nlp(clean_filter)
e=[]
for i in newtxt.ents:
    t=(i,i.label_,i.label)
    e.append(t)
e


[(New York City, 'GPE', 384),
 (Tuesday, 'DATE', 391),
 (285, 'CARDINAL', 397),
 (September, 'DATE', 391),
 (Brooklyn, 'GPE', 384),
 (Zip, 'PERSON', 380),
 (Bill de Blasio, 'PERSON', 380),
 (Tuesday, 'DATE', 391),
 (Orthodox, 'NORP', 381),
 (Jews, 'NORP', 381),
 (6 months old, 'DATE', 391),
 (1 000, 'CARDINAL', 397)]

Tokenization and lemmitization

In [31]:
tokens=[[token.text,token.lemma_] for token in newtxt]
tokens

[['New', 'New'],
 ['York', 'York'],
 ['City', 'City'],
 [' ', ' '],
 ['Tuesday', 'Tuesday'],
 ['declared', 'declare'],
 [' ', ' '],
 ['public', 'public'],
 ['health', 'health'],
 ['emergency', 'emergency'],
 [' ', ' '],
 ['ordered', 'order'],
 ['mandatory', 'mandatory'],
 ['measles', 'measles'],
 ['vaccinations', 'vaccination'],
 ['amid', 'amid'],
 [' ', ' '],
 ['outbreak', 'outbreak'],
 ['    ', '    '],
 ['latest', 'late'],
 ['national', 'national'],
 ['flash', 'flash'],
 ['point', 'point'],
 [' ', ' '],
 ['refusals', 'refusal'],
 [' ', ' '],
 ['inoculate', 'inoculate'],
 [' ', ' '],
 ['dangerous', 'dangerous'],
 ['diseases', 'disease'],
 ['       ', '       '],
 ['285', '285'],
 ['people', 'people'],
 [' ', ' '],
 ['contracted', 'contract'],
 ['measles', 'measle'],
 ['  ', '  '],
 ['city', 'city'],
 [' ', ' '],
 ['September', 'September'],
 ['    ', '    '],
 ['Brooklyn', 'Brooklyn'],
 [' ', ' '],
 ['Williamsburg', 'Williamsburg'],
 ['neighborhood', 'neighborhood'],
 ['   ', '   '],

In [None]:
!pip install -U sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer
# model=SentenceTransformer('all-MiniLM-L6-v2')
model=SentenceTransformer('distilbert-base-nli-mean-tokens')

sentences=['This framework generates embeddings for each input sentence',
    'Sentences are passed as a list of string.', 
    'The quick brown fox jumps over the lazy dog.']
sent_em=model.encode(sentences)
for sent,em in zip(sentences,sent_em):
    print("Sentence:", sent)
    print("Embedding:", em, end='\n\n')

In [2]:
import pandas as pd

In [35]:
!pip install faiss-cpu



In [30]:
import numpy as np
import torch
import os
import faiss
df=pd.read_csv('medium_Data.csv.xls')
data=df['Contents'].to_list()

actual=data[0:11]
actual[6]


'ChatGPT is blowing up. Twitter is inundated with screenshots of the app, coding sites like Stack Overflow are already banning answers produced with it, and over 1 million people have played with it. It’s a sensation.'

In [18]:
model=SentenceTransformer('distilbert-base-nli-mean-tokens')
encodemodel=model.encode(actual)

In [19]:
idx=faiss.IndexIDMap(faiss.IndexFlatIP(768))
idx.add_with_ids(encodemodel, np.array(range(0,len(actual))))
faiss.write_index(idx,'News')


In [20]:
idx=faiss.read_index('News')

In [21]:
def search(q):
    qv=model.encode([q])
    k=2
    top_k=idx.search(qv,k)
    return [actual[_id] for _id in top_k[1].tolist()[0]]


In [39]:
q="It’s a sensation"
results=search(q)

In [40]:
for r in results:
    print('\t',r,end='\n\n')

	 Google is completely changing the way that search engines operate.Search behaviours encompass much more than simply asking a question and looking for an answer. Often, people don’t have a specific question in mind, but are…

	 When Gottfried Wilhelm Leibniz discovered binary, he was ecstatic. In a letter to the Duke of Brunswick, he sketched his famous medallion depicting the creation of the universe: a tablet of binary numbers, commandments consisting solely of the integers from 0000 to 1111, wreathed in the motto Omnibus ex nihilo ducendis sufficit unum (“To produce everything out of nothing, one is sufficient”).The 0 was nothingness; the 1, God.Leibniz saw binary in the broken and unbroken lines of the I Ching,¹ in his own mathematics, and in the monads he glimpsed behind reality’s matrix, sifting through the aether like luminescent kanji. He believed binary to be the universal language, allowing any proposition or argument to be tested computationally for its correctness (0 or 1)