## Introduction to tfidf matrix
- Creating tfidf matrix
- Extracting key words using tfidf

In [1]:
import pandas as pd
import numpy as np

In [2]:
corpus=["This is tweet one","This is tweet two","This is tweet three"]

In [3]:
from sklearn.feature_extraction import text

In [4]:
tfidf=text.TfidfVectorizer(input = corpus)
matrix=tfidf.fit_transform(corpus)
matrix.toarray()

array([[0.41285857, 0.69903033, 0.41285857, 0.        , 0.41285857,
        0.        ],
       [0.41285857, 0.        , 0.41285857, 0.        , 0.41285857,
        0.69903033],
       [0.41285857, 0.        , 0.41285857, 0.69903033, 0.41285857,
        0.        ]])

In [5]:
tfidf.get_feature_names_out()

array(['is', 'one', 'this', 'three', 'tweet', 'two'], dtype=object)

In [6]:
pd.DataFrame(matrix.toarray(),columns=tfidf.get_feature_names_out())

Unnamed: 0,is,one,this,three,tweet,two
0,0.412859,0.69903,0.412859,0.0,0.412859,0.0
1,0.412859,0.0,0.412859,0.0,0.412859,0.69903
2,0.412859,0.0,0.412859,0.69903,0.412859,0.0


#### Extracting key-words using tfidf weights

In [7]:
path = "./data/transcripts.csv"
data=pd.read_csv(path)

In [8]:
data.head(2)

Unnamed: 0,transcript,url
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...


In [9]:
tfidf=text.TfidfVectorizer(input = data['transcript'].tolist(),stop_words='english',max_features=5000)

In [10]:
matrix=tfidf.fit_transform(data['transcript'].tolist())

In [11]:
matrix.shape

(2467, 5000)

In [12]:
#### How do I extract the key-words? Pick top 5 words from each transcript by tfidf value
kw=[]
for row in matrix.toarray():
    idx=row.argsort()[-5:].tolist()
    idx=idx[::-1]
    keywords=np.array(tfidf.get_feature_names())[idx]
    keywords=",".join(keywords)
    kw.append(keywords)



In [13]:
data.head(2) ##use url col to extract the title of the talk

Unnamed: 0,transcript,url
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...


In [14]:
data['url'][0]

'https://www.ted.com/talks/ken_robinson_says_schools_kill_creativity\n'

In [15]:
data['url'][0].strip().split("/")

['https:',
 '',
 'www.ted.com',
 'talks',
 'ken_robinson_says_schools_kill_creativity']

In [16]:
data['kwyords']=kw
data.head(2)

Unnamed: 0,transcript,url,kwyords
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...,"education,laughter,said,think,dance"
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...,"carbon,laughter,emissions,restaurant,trucks"


In [17]:
data['title']=data['url'].map(lambda x:x.split("/")[-1].strip())

In [18]:
data.head(10)

Unnamed: 0,transcript,url,kwyords,title
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...,"education,laughter,said,think,dance",ken_robinson_says_schools_kill_creativity
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...,"carbon,laughter,emissions,restaurant,trucks",al_gore_on_averting_climate_crisis
2,"(Music: ""The Sound of Silence,"" Simon & Garfun...",https://www.ted.com/talks/david_pogue_says_sim...,"microsoft,software,laughter,features,apple",david_pogue_says_simplicity_sells
3,If you're here today — and I'm very happy that...,https://www.ted.com/talks/majora_carter_s_tale...,"bronx,south,environmental,community,city",majora_carter_s_tale_of_urban_renewal
4,"About 10 years ago, I took on the task to teac...",https://www.ted.com/talks/hans_rosling_shows_t...,"countries,data,africa,world,income",hans_rosling_shows_the_best_stats_you_ve_ever_...
5,Thank you. I have to tell you I'm both challen...,https://www.ted.com/talks/tony_robbins_asks_wh...,"emotion,people,know,needs,decisions",tony_robbins_asks_why_we_do_what_we_do
6,"On September 10, the morning of my seventh bir...",https://www.ted.com/talks/julia_sweeney_on_let...,"said,santa,god,laughter,birthday",julia_sweeney_on_letting_go_of_god
7,I'm going to present three projects in rapid f...,https://www.ted.com/talks/joshua_prince_ramus_...,"building,theater,library,sort,diagram",joshua_prince_ramus_on_seattle_s_library
8,It's wonderful to be back. I love this wonderf...,https://www.ted.com/talks/dan_dennett_s_respon...,"religions,sheep,god,clever,book",dan_dennett_s_response_to_rick_warren
9,"I'm often asked, ""What surprised you about the...",https://www.ted.com/talks/rick_warren_on_a_lif...,"god,influence,worldview,believe,wired",rick_warren_on_a_life_of_purpose
