In [2]:
import csv
import pandas as pd
pd.options.display.max_colwidth = 200
import PySimpleGUI as sg
from tqdm import tqdm
import tensorflow_hub as hub
import numpy as np

def gui(data):
    # GUI for inputing a sentence to search and outputing results
    sg.theme('DARKBLUE4')
    layout = [[sg.Input(key='-IN-')],
              [sg.Button('Search', bind_return_key=True)],
             [ sg.Text(size=(100,30), key='-OUTPUT-')]]

    window = sg.Window('NLP1', layout)
    
    while True:
        event, values = window.read()

        if event in  (None, 'Exit'):
            break

        if event == 'Search':
            window.TKroot.title('LOADING')
            result = search(data, values['-IN-']).values

            outputs = '\n'.join(
                ["In " + str(stype) + " " + str(value) + ": " + title + "\n" + link
                 for title, article, value, link, stype in 
                 zip(
                     [i[0] for i in result],
                     [i[1] for i in result],
                     [i[2] for i in result],
                     [i[3] for i in result],
                     [i[4] for i in result]
                 )])
            window['-OUTPUT-'].update(outputs)
            window.TKroot.title('NLP1')

    window.close()
    
def search(indata, tosearch):
    # Method to complete a semantic search
    # Output is 10 most similar articles
    # Final score is a maximum of similarities between article and title
    
    search = embed([tosearch])
    data = indata.copy()

    search_title = []
    for i in data.embed_title.values:
        search_title.append(round(np.inner(i,search)[0][0]*100,2))
    data["search_title"] = search_title
    
    search_article = []
    for i in data.embed_article.values:
        search_article.append(round(np.inner(i,search)[0][0]*100,2))
    data["search_article"] = search_article
    
    scores = []
    types = []
    for t,a in zip(search_title, search_article):
        scores.append(max(t,a))
        if t >= a:
            types.append("title")
        else:
            types.append("article")
        
    data["score"] = scores
    data["type"] = types
    
    data = data.sort_values(by=['score'], ascending=False)
    data = data[["title","article","score","url", "type"]]
    
    return data.head(10)

### Reading  data

In [4]:
data = pd.read_csv("SCRAPED.csv", encoding="UTF-8", sep = "¬", na_filter = True, engine='python')

data["article"] = [str(i).strip() for i in data.article]
data["title"] = [str(i).strip() for i in data.title]

### Vectorizing title and article

In [3]:
# If a model from local computer is needed due to network problems
#embed = hub.KerasLayer(
#    'C:\\Users\\user\\Desktop\\VU\\5 pusmetis\\Natūralios kalbos apdorojimas\\Uzduotys\\model'
#)
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

In [5]:
embed_title = []
for i in tqdm(data.title.values):
    embed_title.append(embed([i]))

data["embed_title"] = embed_title

100%|███████████████████████████████████████████████████████████████████████████| 24826/24826 [01:02<00:00, 394.53it/s]


In [6]:
embed_article = []
for i in tqdm(data.article.values):
    embed_article.append(embed([i]))
    
data["embed_article"] = embed_article

100%|███████████████████████████████████████████████████████████████████████████| 24826/24826 [01:41<00:00, 245.37it/s]


In [7]:
data.head(3)

Unnamed: 0,title,article,url,embed_title,embed_article
0,Europe midday: Shares down as investors fret over inflation; Asos slumps,(Sharecast News) - European stocks were slid lower at midday on Monday as investors fretted about persistent inflationary pressure while rising commodity prices lifted mining and energy shares. Th...,https://www.lse.co.uk/news/europe-midday-shares-down-as-investors-fret-over-inflation-asos-slumps-w6fggdhxlu8aic7.html,"((tf.Tensor(0.010141957, shape=(), dtype=float32), tf.Tensor(0.056435425, shape=(), dtype=float32), tf.Tensor(0.059885427, shape=(), dtype=float32), tf.Tensor(0.066110164, shape=(), dtype=float32)...","((tf.Tensor(0.026592027, shape=(), dtype=float32), tf.Tensor(-0.061802354, shape=(), dtype=float32), tf.Tensor(0.011562209, shape=(), dtype=float32), tf.Tensor(0.031157665, shape=(), dtype=float32..."
1,Tungsten West looks to cash-in on 'green transition' with AIM float,"(Sharecast News) - Tungsten West is looking to float on the Alternative Investment Market via an institutional placing and subscription. The tungsten and tin miner, which is focussed on recommenci...",https://www.lse.co.uk/news/tungsten-west-looks-to-cash-in-on-green-transition-with-aim-float-bd5i4uuqyiw2pvu.html,"((tf.Tensor(0.016608588, shape=(), dtype=float32), tf.Tensor(-0.0030601071, shape=(), dtype=float32), tf.Tensor(0.035051487, shape=(), dtype=float32), tf.Tensor(0.005302702, shape=(), dtype=float3...","((tf.Tensor(-0.03884086, shape=(), dtype=float32), tf.Tensor(-0.06557003, shape=(), dtype=float32), tf.Tensor(0.008121927, shape=(), dtype=float32), tf.Tensor(-0.06400168, shape=(), dtype=float32)..."
2,IN BRIEF: TMT Acquisition rises on first day on London Main Market,"TMT Acquisition PLC - seeking to buy technology, media and telecom businesses - Starts trading on London Main Market on Monday, having placed 25.0 million new shares at 20 pence to raise GBP5 mill...",https://www.lse.co.uk/news/in-brief-tmt-acquisition-rises-on-first-day-on-london-main-market-tzuabhcz4ewzjcd.html,"((tf.Tensor(0.0019928652, shape=(), dtype=float32), tf.Tensor(-0.059186246, shape=(), dtype=float32), tf.Tensor(0.01634468, shape=(), dtype=float32), tf.Tensor(-0.0006566181, shape=(), dtype=float...","((tf.Tensor(-0.03423629, shape=(), dtype=float32), tf.Tensor(-0.06901643, shape=(), dtype=float32), tf.Tensor(0.003115381, shape=(), dtype=float32), tf.Tensor(0.058315556, shape=(), dtype=float32)..."


### Search without GUI

In [8]:
search(data,"covid vaccine")

Unnamed: 0,title,article,score,url,type
6984,LIVE MARKETS-U.S. vaccination worries,* STOXX 600 flat * U.S. futures rise * Wait and see before Jackson Hole Aug 27 - Welcome to the home for real-time coverage of markets brought to you by Reuters reporters. You can share your thoug...,63.88,https://www.lse.co.uk/news/live-markets-us-vaccination-worries-dz2lc1ktrbpjwhx.html,title
15216,"Fully vaccinated people do not yet require a booster vaccine shot, FDA and CDC say","(Sharecast News) - People that are fully vaccinated with Pfizer's Covid-19 vaccine do not yet require a so-called 'booster', America's top drug and health officials said. Overnight, the Food and D...",60.84,https://www.lse.co.uk/news/fully-vaccinated-people-do-not-yet-require-a-booster-vaccine-shot-fda-and-cdc-say-1qw4rrakqxu4i54.html,title
15413,"Fully vaccinated people do not yet require a booster vaccine shot, FDA and CDC say","(Sharecast News) - People that are fully vaccinated with Pfizer's Covid-19 vaccine do not yet require a so-called 'booster', America's top drug and health officials said. Overnight, the Food and D...",60.84,https://www.lse.co.uk/news/fully-vaccinated-people-do-not-yet-require-a-booster-vaccine-shot-fda-and-cdc-say-7e4b8bhdjfhttjw.html,title
2431,Safe to give COVID-19 shot and flu vaccine at the same time - UK study,"LONDON, Sept 30 (Reuters) - It is safe for people to receive a COVID-19 vaccine and a flu shot at the same time and it does not negatively impact the immune response produced by either, a British ...",54.16,https://www.lse.co.uk/news/safe-to-give-covid-19-shot-and-flu-vaccine-at-the-same-time-uk-study-j1e94ih4w91xzzc.html,title
20911,UPDATE 1-Italy halts AstraZeneca vaccine for under-60s,"(Adds details, background) By Gavin Jones ROME, June 11 (Reuters) - The Italian government said on Friday it was restricting the use of the AstraZeneca COVID-19 vaccine to people over the age of 6...",53.87,https://www.lse.co.uk/news/update-1-italy-halts-astrazeneca-vaccine-for-under-60s--alyptxbic4ea1r6.html,title
4604,Johnson & Johnson seeks approval for Covid-19 jab booster,(Sharecast News) - Johnson & Johnson is seeking regulatory approval in the US for a booster shot for its Covid-19 jab. The US company said it had submitted data to the US Food and Drug Administrat...,53.76,https://www.lse.co.uk/news/johnson-amp-johnson-seeks-approval-for-covid-19-jab-booster-24y37rfzj7j6ses.html,article
23539,LIVE MARKETS-50% of the world vaccinated by end-2021,* European shares down 0.4% * FTSE down as British economy further reopens * Shares in Diploma jump on positive FY outlook Welcome to the home for real-time coverage of markets brought to you by R...,53.62,https://www.lse.co.uk/news/live-markets-50-of-the-world-vaccinated-by-end-2021-cxg1irpb2t61w81.html,title
10293,"No need for COVID booster jabs for now, vaccine supplies short - WHO","GENEVA, Aug 18 (Reuters) - Current data does not indicate that COVID-19 booster shots are needed, World Health Organisation (WHO) chief scientist Soumya Swaminathan said on Wednesday. WHO adviser ...",53.59,https://www.lse.co.uk/news/no-need-for-covid-booster-jabs-for-now-vaccine-supplies-short-who-2l7vi4a44i61ctz.html,title
377,"UK medicine regulator says Pfizer, AstraZeneca COVID-19 shots may be used as boosters","LONDON, Sept 9 (Reuters) - Britain's medical regulator on Thursday gave the go-ahead for Pfizer and AstraZeneca's COVID-19 vaccine to be used in any future booster programme, but said any decision...",53.32,https://www.lse.co.uk/news/uk-medicine-regulator-says-pfizer-astrazeneca-covid-19-shots-may-be-used-as-boosters-tv8uf1ewz9ejguf.html,article
14479,Johnson and Johnson Covid-19 shot effective against Delta variant,"(Sharecast News) - Johnson&Johnson's Covid-19 vaccine provides protections against all variants, including the increasingly predominant Delta variant. On Thursday evening, the drug giant said that...",52.74,https://www.lse.co.uk/news/johnson-and-johnson-covid-19-shot-effective-against-delta-variant-kauluda4ragx4l1.html,article


## Additional exercise

### GUI

In [9]:
gui(data)