# 1 - Parts of Speech Tagging

In [1]:
import spacy
import pandas as pd

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
# our text is from jane austin's 'emma'
# we have removed punctuation, lowercased but left in stop words
emma_ja = "emma woodhouse handsome clever and rich with a comfortable home and happy disposition seemed to unite some of the best blessings of existence and had lived nearly twentyone years in the world with very little to distress or vex her she was the youngest of the two daughters of a most affectionate indulgent father and had in consequence of her sisters marriage been mistress of his house from a very early period her mother had died too long ago for her to have more than an indistinct remembrance of her caresses and her place had been supplied by an excellent woman as governess who had fallen little short of a mother in affection sixteen years had miss taylor been in mr woodhouses family less as a governess than a friend very fond of both daughters but particularly of emma between them it was more the intimacy of sisters even before miss taylor had ceased to hold the nominal office of governess the mildness of her temper had hardly allowed her to impose any restraint and the shadow of authority being now long passed away they had been living together as friend and friend very mutually attached and emma doing just what she liked highly esteeming miss taylors judgment but directed chiefly by her own"
print(emma_ja)

emma woodhouse handsome clever and rich with a comfortable home and happy disposition seemed to unite some of the best blessings of existence and had lived nearly twentyone years in the world with very little to distress or vex her she was the youngest of the two daughters of a most affectionate indulgent father and had in consequence of her sisters marriage been mistress of his house from a very early period her mother had died too long ago for her to have more than an indistinct remembrance of her caresses and her place had been supplied by an excellent woman as governess who had fallen little short of a mother in affection sixteen years had miss taylor been in mr woodhouses family less as a governess than a friend very fond of both daughters but particularly of emma between them it was more the intimacy of sisters even before miss taylor had ceased to hold the nominal office of governess the mildness of her temper had hardly allowed her to impose any restraint and the shadow of auth

In [22]:
# create a spacy doc from our text - this will generate tokens and their assosciated pos tags
spacy_doc = nlp(emma_ja)
spacy_doc

emma woodhouse handsome clever and rich with a comfortable home and happy disposition seemed to unite some of the best blessings of existence and had lived nearly twentyone years in the world with very little to distress or vex her she was the youngest of the two daughters of a most affectionate indulgent father and had in consequence of her sisters marriage been mistress of his house from a very early period her mother had died too long ago for her to have more than an indistinct remembrance of her caresses and her place had been supplied by an excellent woman as governess who had fallen little short of a mother in affection sixteen years had miss taylor been in mr woodhouses family less as a governess than a friend very fond of both daughters but particularly of emma between them it was more the intimacy of sisters even before miss taylor had ceased to hold the nominal office of governess the mildness of her temper had hardly allowed her to impose any restraint and the shadow of auth

In [21]:
# extract the tokens and pos tags into a dataframe
pos_df = pd.DataFrame(columns=['token', 'pos_tag'])
pos_df.head()

Unnamed: 0,token,pos_tag


In [17]:
for token in spacy_doc:
    pos_df = pd.concat([pos_df, pd.DataFrame.from_records([{'token': token.text, 'pos_tag':token.pos}])], ignore_index=True)

In [18]:
pos_df.head(15)

Unnamed: 0,token,pos_tag
0,emma,PROPN
1,woodhouse,PROPN
2,handsome,ADJ
3,clever,ADJ
4,and,CCONJ
5,rich,ADJ
6,with,ADP
7,a,DET
8,comfortable,ADJ
9,home,NOUN


In [19]:
# token frequency count
pos_df_counts = pos_df.groupby(['token', 'pos_tag']).size().reset_index(name='counts').sort_values(by='counts', ascending=False)
pos_df_counts.head(10)

Unnamed: 0,token,pos_tag,counts
177,of,ADP,14
176,of,85,14
98,had,87,9
99,had,AUX,9
108,her,95,9
109,her,PRON,9
13,and,CCONJ,8
223,the,DET,8
222,the,90,8
12,and,89,8


In [20]:

# counts of pos_tags
pos_df_poscounts = pos_df_counts.groupby(['pos_tag'])['token'].count().sort_values(ascending=False)
pos_df_poscounts.head(10)

pos_tag
92      35
NOUN    35
VERB    19
100     19
ADV     18
84      18
ADJ     18
86      18
95       9
PRON     9
Name: token, dtype: int64

# 2 - Named Entity Recognition

In [38]:
import spacy
from spacy import displacy
from spacy import tokenizer
import re
nlp = spacy.load('en_core_web_sm')

In [39]:
google_text = "Google was founded on September 4, 1998, by computer scientists Larry Page and Sergey Brin while they were PhD students at Stanford University in California. Together they own about 14% of its publicly listed shares and control 56% of its stockholder voting power through super-voting stock. The company went public via an initial public offering (IPO) in 2004. In 2015, Google was reorganized as a wholly owned subsidiary of Alphabet Inc. Google is Alphabet's largest subsidiary and is a holding company for Alphabet's internet properties and interests. Sundar Pichai was appointed CEO of Google on October 24, 2015, replacing Larry Page, who became the CEO of Alphabet. On December 3, 2019, Pichai also became the CEO of Alphabet."
print(google_text)

Google was founded on September 4, 1998, by computer scientists Larry Page and Sergey Brin while they were PhD students at Stanford University in California. Together they own about 14% of its publicly listed shares and control 56% of its stockholder voting power through super-voting stock. The company went public via an initial public offering (IPO) in 2004. In 2015, Google was reorganized as a wholly owned subsidiary of Alphabet Inc. Google is Alphabet's largest subsidiary and is a holding company for Alphabet's internet properties and interests. Sundar Pichai was appointed CEO of Google on October 24, 2015, replacing Larry Page, who became the CEO of Alphabet. On December 3, 2019, Pichai also became the CEO of Alphabet.


In [40]:
spacy_doc = nlp(google_text)

In [41]:
for word in spacy_doc.ents:
    print(word.text, word.label_)

Google ORG
September 4, 1998 DATE
Larry Page PERSON
Sergey Brin PERSON
PhD WORK_OF_ART
Stanford University ORG
California GPE
about 14% PERCENT
56% PERCENT
IPO ORG
2004 DATE
2015 DATE
Google ORG
Alphabet Inc. ORG
Alphabet ORG
Alphabet ORG
Sundar Pichai PERSON
Google ORG
October 24, 2015 DATE
Larry Page PERSON
Alphabet GPE
December 3, 2019 DATE
Pichai PERSON
Alphabet GPE


In [42]:
displacy.render(spacy_doc,style="ent",jupyter=True)

ImportError: cannot import name 'display' from 'IPython.core.display' (/opt/anaconda3/envs/nlp_course_env/lib/python3.11/site-packages/IPython/core/display.py)

# 3 - Practical

In [45]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
import pandas as pd
import spacy
import matplotlib.pyplot as plt

In [47]:
bbc_data = pd.read_csv('assets/bbc_news.csv')
bbc_data.head()

Unnamed: 0.1,Unnamed: 0,index,title,pubDate,guid,link,description
0,0,6684,Can I refuse to work?,"Wed, 10 Aug 2022 15:46:18 GMT",https://www.bbc.co.uk/news/business-62147992,https://www.bbc.co.uk/news/business-62147992?a...,With much of the UK enduring another period of...
1,1,9267,'Liz Truss the Brief?' World reacts to UK poli...,"Mon, 17 Oct 2022 11:35:12 GMT",https://www.bbc.co.uk/news/world-63285480,https://www.bbc.co.uk/news/world-63285480?at_m...,The UK's political chaos has been watched arou...
2,2,7387,Rationing energy is nothing new for off-grid c...,"Wed, 31 Aug 2022 05:20:18 GMT",https://www.bbc.co.uk/news/uk-scotland-highlan...,https://www.bbc.co.uk/news/uk-scotland-highlan...,Scoraig in the north west Highlands has long h...
3,3,767,The hunt for superyachts of sanctioned Russian...,"Tue, 22 Mar 2022 14:37:01 GMT",https://www.bbc.co.uk/news/60739336,https://www.bbc.co.uk/news/60739336?at_medium=...,"Wealthy Russians sanctioned by the US, EU and ..."
4,4,3712,Platinum Jubilee: 70 years of the Queen in 70 ...,"Wed, 01 Jun 2022 23:17:33 GMT",https://www.bbc.co.uk/news/uk-61660128,https://www.bbc.co.uk/news/uk-61660128?at_medi...,A quick look back at the Queen's 70 years on t...


In [48]:
bbc_data.head()

Unnamed: 0.1,Unnamed: 0,index,title,pubDate,guid,link,description
0,0,6684,Can I refuse to work?,"Wed, 10 Aug 2022 15:46:18 GMT",https://www.bbc.co.uk/news/business-62147992,https://www.bbc.co.uk/news/business-62147992?a...,With much of the UK enduring another period of...
1,1,9267,'Liz Truss the Brief?' World reacts to UK poli...,"Mon, 17 Oct 2022 11:35:12 GMT",https://www.bbc.co.uk/news/world-63285480,https://www.bbc.co.uk/news/world-63285480?at_m...,The UK's political chaos has been watched arou...
2,2,7387,Rationing energy is nothing new for off-grid c...,"Wed, 31 Aug 2022 05:20:18 GMT",https://www.bbc.co.uk/news/uk-scotland-highlan...,https://www.bbc.co.uk/news/uk-scotland-highlan...,Scoraig in the north west Highlands has long h...
3,3,767,The hunt for superyachts of sanctioned Russian...,"Tue, 22 Mar 2022 14:37:01 GMT",https://www.bbc.co.uk/news/60739336,https://www.bbc.co.uk/news/60739336?at_medium=...,"Wealthy Russians sanctioned by the US, EU and ..."
4,4,3712,Platinum Jubilee: 70 years of the Queen in 70 ...,"Wed, 01 Jun 2022 23:17:33 GMT",https://www.bbc.co.uk/news/uk-61660128,https://www.bbc.co.uk/news/uk-61660128?at_medi...,A quick look back at the Queen's 70 years on t...


In [50]:
titles = pd.DataFrame(bbc_data['title'])
titles

Unnamed: 0,title
0,Can I refuse to work?
1,'Liz Truss the Brief?' World reacts to UK poli...
2,Rationing energy is nothing new for off-grid c...
3,The hunt for superyachts of sanctioned Russian...
4,Platinum Jubilee: 70 years of the Queen in 70 ...
...,...
995,Dominic Raab: Third senior civil servant gives...
996,Highlights: Radacanu beats Uytvanck
997,In pictures: Mountain bikers descend snowy peak
998,"Companies must help cut living costs, says new..."


## Preprocessing data

In [52]:
#Clean data
titles['lowercase'] = titles['title'].str.lower()
titles

Unnamed: 0,title,lowercase
0,Can I refuse to work?,can i refuse to work?
1,'Liz Truss the Brief?' World reacts to UK poli...,'liz truss the brief?' world reacts to uk poli...
2,Rationing energy is nothing new for off-grid c...,rationing energy is nothing new for off-grid c...
3,The hunt for superyachts of sanctioned Russian...,the hunt for superyachts of sanctioned russian...
4,Platinum Jubilee: 70 years of the Queen in 70 ...,platinum jubilee: 70 years of the queen in 70 ...
...,...,...
995,Dominic Raab: Third senior civil servant gives...,dominic raab: third senior civil servant gives...
996,Highlights: Radacanu beats Uytvanck,highlights: radacanu beats uytvanck
997,In pictures: Mountain bikers descend snowy peak,in pictures: mountain bikers descend snowy peak
998,"Companies must help cut living costs, says new...","companies must help cut living costs, says new..."


In [54]:
# Remove stop words
en_stopwords = stopwords.words('english')
titles['no_stopwords'] = titles['lowercase'].apply(lambda x : ' '.join([word for word in x.split() if word not in en_stopwords]))
titles

Unnamed: 0,title,lowercase,no_stopwords
0,Can I refuse to work?,can i refuse to work?,refuse work?
1,'Liz Truss the Brief?' World reacts to UK poli...,'liz truss the brief?' world reacts to uk poli...,'liz truss brief?' world reacts uk political t...
2,Rationing energy is nothing new for off-grid c...,rationing energy is nothing new for off-grid c...,rationing energy nothing new off-grid community
3,The hunt for superyachts of sanctioned Russian...,the hunt for superyachts of sanctioned russian...,hunt superyachts sanctioned russian oligarchs
4,Platinum Jubilee: 70 years of the Queen in 70 ...,platinum jubilee: 70 years of the queen in 70 ...,platinum jubilee: 70 years queen 70 seconds
...,...,...,...
995,Dominic Raab: Third senior civil servant gives...,dominic raab: third senior civil servant gives...,dominic raab: third senior civil servant gives...
996,Highlights: Radacanu beats Uytvanck,highlights: radacanu beats uytvanck,highlights: radacanu beats uytvanck
997,In pictures: Mountain bikers descend snowy peak,in pictures: mountain bikers descend snowy peak,pictures: mountain bikers descend snowy peak
998,"Companies must help cut living costs, says new...","companies must help cut living costs, says new...","companies must help cut living costs, says new..."


In [55]:
# punctation removal
titles['no_stopwords_no_punct'] = titles.apply(lambda x: re.sub(r"([^\w\s])", "", x['no_stopwords']), axis=1)
titles

Unnamed: 0,title,lowercase,no_stopwords,no_stopwords_no_punct
0,Can I refuse to work?,can i refuse to work?,refuse work?,refuse work
1,'Liz Truss the Brief?' World reacts to UK poli...,'liz truss the brief?' world reacts to uk poli...,'liz truss brief?' world reacts uk political t...,liz truss brief world reacts uk political turmoil
2,Rationing energy is nothing new for off-grid c...,rationing energy is nothing new for off-grid c...,rationing energy nothing new off-grid community,rationing energy nothing new offgrid community
3,The hunt for superyachts of sanctioned Russian...,the hunt for superyachts of sanctioned russian...,hunt superyachts sanctioned russian oligarchs,hunt superyachts sanctioned russian oligarchs
4,Platinum Jubilee: 70 years of the Queen in 70 ...,platinum jubilee: 70 years of the queen in 70 ...,platinum jubilee: 70 years queen 70 seconds,platinum jubilee 70 years queen 70 seconds
...,...,...,...,...
995,Dominic Raab: Third senior civil servant gives...,dominic raab: third senior civil servant gives...,dominic raab: third senior civil servant gives...,dominic raab third senior civil servant gives ...
996,Highlights: Radacanu beats Uytvanck,highlights: radacanu beats uytvanck,highlights: radacanu beats uytvanck,highlights radacanu beats uytvanck
997,In pictures: Mountain bikers descend snowy peak,in pictures: mountain bikers descend snowy peak,pictures: mountain bikers descend snowy peak,pictures mountain bikers descend snowy peak
998,"Companies must help cut living costs, says new...","companies must help cut living costs, says new...","companies must help cut living costs, says new...",companies must help cut living costs says new ...


In [56]:
# tokenize
titles['tokens_raw'] = titles.apply(lambda x: word_tokenize(x['title']), axis=1)
titles['tokens_clean'] = titles.apply(lambda x: word_tokenize(x['no_stopwords_no_punct']), axis=1)
titles.head()

Unnamed: 0,title,lowercase,no_stopwords,no_stopwords_no_punct,tokens_raw,tokens_clean
0,Can I refuse to work?,can i refuse to work?,refuse work?,refuse work,"[Can, I, refuse, to, work, ?]","[refuse, work]"
1,'Liz Truss the Brief?' World reacts to UK poli...,'liz truss the brief?' world reacts to uk poli...,'liz truss brief?' world reacts uk political t...,liz truss brief world reacts uk political turmoil,"['Liz, Truss, the, Brief, ?, ', World, reacts,...","[liz, truss, brief, world, reacts, uk, politic..."
2,Rationing energy is nothing new for off-grid c...,rationing energy is nothing new for off-grid c...,rationing energy nothing new off-grid community,rationing energy nothing new offgrid community,"[Rationing, energy, is, nothing, new, for, off...","[rationing, energy, nothing, new, offgrid, com..."
3,The hunt for superyachts of sanctioned Russian...,the hunt for superyachts of sanctioned russian...,hunt superyachts sanctioned russian oligarchs,hunt superyachts sanctioned russian oligarchs,"[The, hunt, for, superyachts, of, sanctioned, ...","[hunt, superyachts, sanctioned, russian, oliga..."
4,Platinum Jubilee: 70 years of the Queen in 70 ...,platinum jubilee: 70 years of the queen in 70 ...,platinum jubilee: 70 years queen 70 seconds,platinum jubilee 70 years queen 70 seconds,"[Platinum, Jubilee, :, 70, years, of, the, Que...","[platinum, jubilee, 70, years, queen, 70, seco..."


In [59]:
# lemmatize
lemmatizer = WordNetLemmatizer()
titles['tokens_clean_lemmatized'] = titles['tokens_clean'].apply(lambda x : [lemmatizer.lemmatize(tokens) for tokens in x])
titles

Unnamed: 0,title,lowercase,no_stopwords,no_stopwords_no_punct,tokens_raw,tokens_clean,tokens_clean_lemmatized
0,Can I refuse to work?,can i refuse to work?,refuse work?,refuse work,"[Can, I, refuse, to, work, ?]","[refuse, work]","[refuse, work]"
1,'Liz Truss the Brief?' World reacts to UK poli...,'liz truss the brief?' world reacts to uk poli...,'liz truss brief?' world reacts uk political t...,liz truss brief world reacts uk political turmoil,"['Liz, Truss, the, Brief, ?, ', World, reacts,...","[liz, truss, brief, world, reacts, uk, politic...","[liz, truss, brief, world, reacts, uk, politic..."
2,Rationing energy is nothing new for off-grid c...,rationing energy is nothing new for off-grid c...,rationing energy nothing new off-grid community,rationing energy nothing new offgrid community,"[Rationing, energy, is, nothing, new, for, off...","[rationing, energy, nothing, new, offgrid, com...","[rationing, energy, nothing, new, offgrid, com..."
3,The hunt for superyachts of sanctioned Russian...,the hunt for superyachts of sanctioned russian...,hunt superyachts sanctioned russian oligarchs,hunt superyachts sanctioned russian oligarchs,"[The, hunt, for, superyachts, of, sanctioned, ...","[hunt, superyachts, sanctioned, russian, oliga...","[hunt, superyachts, sanctioned, russian, oliga..."
4,Platinum Jubilee: 70 years of the Queen in 70 ...,platinum jubilee: 70 years of the queen in 70 ...,platinum jubilee: 70 years queen 70 seconds,platinum jubilee 70 years queen 70 seconds,"[Platinum, Jubilee, :, 70, years, of, the, Que...","[platinum, jubilee, 70, years, queen, 70, seco...","[platinum, jubilee, 70, year, queen, 70, second]"
...,...,...,...,...,...,...,...
995,Dominic Raab: Third senior civil servant gives...,dominic raab: third senior civil servant gives...,dominic raab: third senior civil servant gives...,dominic raab third senior civil servant gives ...,"[Dominic, Raab, :, Third, senior, civil, serva...","[dominic, raab, third, senior, civil, servant,...","[dominic, raab, third, senior, civil, servant,..."
996,Highlights: Radacanu beats Uytvanck,highlights: radacanu beats uytvanck,highlights: radacanu beats uytvanck,highlights radacanu beats uytvanck,"[Highlights, :, Radacanu, beats, Uytvanck]","[highlights, radacanu, beats, uytvanck]","[highlight, radacanu, beat, uytvanck]"
997,In pictures: Mountain bikers descend snowy peak,in pictures: mountain bikers descend snowy peak,pictures: mountain bikers descend snowy peak,pictures mountain bikers descend snowy peak,"[In, pictures, :, Mountain, bikers, descend, s...","[pictures, mountain, bikers, descend, snowy, p...","[picture, mountain, bikers, descend, snowy, peak]"
998,"Companies must help cut living costs, says new...","companies must help cut living costs, says new...","companies must help cut living costs, says new...",companies must help cut living costs says new ...,"[Companies, must, help, cut, living, costs, ,,...","[companies, must, help, cut, living, costs, sa...","[company, must, help, cut, living, cost, say, ..."


In [60]:
# create lists for just our tokens
tokens_raw_list = sum(titles['tokens_raw'], []) #unpack our lists into a single list
tokens_clean_list = sum(titles['tokens_clean_lemmatized'], [])

tokens_clean_list

## POS Tagging

In [62]:
nlp = spacy.load('en_core_web_sm')

In [64]:
spacy_doc = nlp(' '.join(tokens_raw_list))

In [65]:
pos_df = pd.DataFrame(columns=['token', 'pos_tag'])

In [67]:
for token in spacy_doc:
    pos_df = pd.concat([pos_df, pd.DataFrame.from_records(
        [{'token':token.text, 'pos_tag':token.pos_}]
    )], ignore_index=True)
pos_df

Unnamed: 0,token,pos_tag
0,Can,AUX
1,I,PRON
2,refuse,VERB
3,to,PART
4,work,VERB
...,...,...
11742,sale,NOUN
11743,scams,NOUN
11744,",",PUNCT
11745,consumers,NOUN


In [68]:
pos_df_counts = pos_df.groupby(['token', 'pos_tag']).size().reset_index(name='counts').sort_values(by='counts', ascending=False)
pos_df_counts

Unnamed: 0,token,pos_tag,counts
95,:,PUNCT,543
8,',PUNCT,300
2897,in,ADP,187
4082,to,PART,175
3268,of,ADP,172
...,...,...,...
2304,crumbling,VERB,1
2305,crunch,PROPN,1
827,Jarrod,PROPN,1
826,Japanese,ADJ,1


In [70]:
nouns = pos_df_counts[pos_df_counts.pos_tag == 'NOUN']
nouns

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


Unnamed: 0,token,pos_tag,counts
4267,war,NOUN,35
3552,record,NOUN,15
3416,police,NOUN,14
4356,year,NOUN,14
4316,win,NOUN,14
...,...,...,...
2294,criticism,NOUN,1
2296,crocodile,NOUN,1
2297,crop,NOUN,1
2300,crown,NOUN,1


## NER (Named Entity Recognition)

## NER (Named Entity Recognition)

In [72]:
# extract the tokens and entity tags into a dataframe
ner_df = pd.DataFrame(columns=['token', 'ner_tag'])

for token in spacy_doc.ents:
    if pd.isna(token.label_) is False:
        ner_df = pd.concat([ner_df, pd.DataFrame.from_records(
            [{'token': token.text, 'ner_tag': token.label_}])], ignore_index=True)

In [73]:
ner_df.head()

Unnamed: 0,token,ner_tag
0,Liz Truss,PERSON
1,UK,GPE
2,Rationing,PRODUCT
3,superyachts,CARDINAL
4,Russian,NORP


In [74]:
ner_df.value_counts()

token                 ner_tag
Ukraine               GPE        47
UK                    GPE        36
England               GPE        32
Russian               NORP       20
US                    GPE        19
                                 ..
Georgia Taylor-Brown  PERSON      1
Geraint Thomas        PERSON      1
Ghislaine Maxwell     PERSON      1
Gianluigi Lentini     PERSON      1
Lionesses Zelensky    PERSON      1
Name: count, Length: 1114, dtype: int64

In [75]:
# token frequency count
ner_df_counts = ner_df.groupby(['token','ner_tag']).size().reset_index(name='counts').sort_values(by='counts', ascending=False)
ner_df_counts.head(10)

Unnamed: 0,token,ner_tag,counts
965,Ukraine,GPE,47
955,UK,GPE,36
329,England,GPE,32
819,Russian,NORP,20
957,US,GPE,19
1031,World Cup 2022,EVENT,18
1058,first,ORDINAL,13
918,The Papers,WORK_OF_ART,13
378,France,GPE,12
226,China,GPE,11


In [79]:
# token frequency easier code (VBs version)
new_df_counts_vb = ner_df.value_counts(['token', 'ner_tag']).reset_index(name='counts')
new_df_counts_vb.head(10)

Unnamed: 0,token,ner_tag,counts
0,Ukraine,GPE,47
1,UK,GPE,36
2,England,GPE,32
3,Russian,NORP,20
4,US,GPE,19
5,World Cup 2022,EVENT,18
6,first,ORDINAL,13
7,The Papers,WORK_OF_ART,13
8,France,GPE,12
9,China,GPE,11


In [76]:
# most common people
people = ner_df_counts[ner_df_counts.ner_tag == "PERSON"][0:10]
people

Unnamed: 0,token,ner_tag,counts
257,Covid,PERSON,9
760,Queen,PERSON,8
757,Putin,PERSON,8
169,Boris Johnson,PERSON,6
563,Liz Truss,PERSON,6
788,Rishi Sunak,PERSON,5
581,Macron,PERSON,4
762,Quiz,PERSON,4
515,Jurgen Klopp,PERSON,4
325,Emma Raducanu,PERSON,4
