In [1]:
import numpy as np
import pandas as pd
data = pd.read_csv('ted_talks.csv')
data.head()

Unnamed: 0,transcript,url
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...
2,"(Music: ""The Sound of Silence,"" Simon & Garfun...",https://www.ted.com/talks/david_pogue_says_sim...
3,If you're here today — and I'm very happy that...,https://www.ted.com/talks/majora_carter_s_tale...
4,"About 10 years ago, I took on the task to teac...",https://www.ted.com/talks/hans_rosling_shows_t...


In [2]:
data['title'] = data['url'].map(lambda x:x.split('/')[-1])
data.head()

Unnamed: 0,transcript,url,title
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...,ken_robinson_says_schools_kill_creativity\n
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...,al_gore_on_averting_climate_crisis\n
2,"(Music: ""The Sound of Silence,"" Simon & Garfun...",https://www.ted.com/talks/david_pogue_says_sim...,david_pogue_says_simplicity_sells\n
3,If you're here today — and I'm very happy that...,https://www.ted.com/talks/majora_carter_s_tale...,majora_carter_s_tale_of_urban_renewal\n
4,"About 10 years ago, I took on the task to teac...",https://www.ted.com/talks/hans_rosling_shows_t...,hans_rosling_shows_the_best_stats_you_ve_ever_...


In [3]:
from sklearn.feature_extraction import text
ted_talks = data['transcript'].tolist()

In [4]:
bi_tfidf = text.TfidfVectorizer(input=ted_talks,stop_words='english',ngram_range=(1,2))

In [5]:
bi_matrix = bi_tfidf.fit_transform(ted_talks)

In [6]:
uni_tfidf = text.TfidfVectorizer(input=ted_talks, stop_words='english')

In [7]:
uni_matrix = uni_tfidf.fit_transform(ted_talks)

In [8]:
from sklearn.metrics.pairwise import cosine_similarity
bi_sim = cosine_similarity(bi_matrix)
uni_sim = cosine_similarity(uni_matrix)


In [9]:
def recommend_ted_talks(x):
    return '.'.join(data['title'].loc[x.argsort()[-5:-1]])

In [10]:
data['ted_talks_uni'] = [recommend_ted_talks(x) for x in uni_sim]
data['ted_talk_bi'] = [recommend_ted_talks(x) for x in bi_sim]

In [11]:
print(data['ted_talks_uni'].str.replace('_',' ').str.upper().str.strip().str.split('\n')[1])

['RORY BREMNER S ONE MAN WORLD SUMMIT', '.ALICE BOWS LARKIN WE RE TOO LATE TO PREVENT CLIMATE CHANGE HERE S HOW WE ADAPT', '.TED HALSTEAD A CLIMATE SOLUTION WHERE ALL SIDES CAN WIN', '.AL GORE S NEW THINKING ON THE CLIMATE CRISIS']


In [12]:
data.head()

Unnamed: 0,transcript,url,title,ted_talks_uni,ted_talk_bi
0,Good morning. How are you?(Laughter)It's been ...,https://www.ted.com/talks/ken_robinson_says_sc...,ken_robinson_says_schools_kill_creativity\n,rory_bremner_s_one_man_world_summit\n.sakena_y...,sakena_yacoobi_how_i_stopped_the_taliban_from_...
1,"Thank you so much, Chris. And it's truly a gre...",https://www.ted.com/talks/al_gore_on_averting_...,al_gore_on_averting_climate_crisis\n,rory_bremner_s_one_man_world_summit\n.alice_bo...,alice_bows_larkin_we_re_too_late_to_prevent_cl...
2,"(Music: ""The Sound of Silence,"" Simon & Garfun...",https://www.ted.com/talks/david_pogue_says_sim...,david_pogue_says_simplicity_sells\n,rory_bremner_s_one_man_world_summit\n.david_ca...,rory_bremner_s_one_man_world_summit\n.david_ca...
3,If you're here today — and I'm very happy that...,https://www.ted.com/talks/majora_carter_s_tale...,majora_carter_s_tale_of_urban_renewal\n,dena_simmons_how_students_of_color_confront_im...,janette_sadik_khan_new_york_s_streets_not_so_m...
4,"About 10 years ago, I took on the task to teac...",https://www.ted.com/talks/hans_rosling_shows_t...,hans_rosling_shows_the_best_stats_you_ve_ever_...,hans_rosling_religions_and_babies\n.hans_rosli...,hans_rosling_on_global_population_growth\n.han...


In [13]:
data['ted_talks_uni']

0       rory_bremner_s_one_man_world_summit\n.sakena_y...
1       rory_bremner_s_one_man_world_summit\n.alice_bo...
2       rory_bremner_s_one_man_world_summit\n.david_ca...
3       dena_simmons_how_students_of_color_confront_im...
4       hans_rosling_religions_and_babies\n.hans_rosli...
                              ...                        
2462    simon_anholt_which_country_does_the_most_good_...
2463    penelope_boston\n.joel_levine\n.nathalie_cabro...
2464    cynthia_breazeal_the_rise_of_personal_robots\n...
2465    the_inspiration_of_second_life\n.kary_mullis_o...
2466    amanda_burden_how_public_spaces_make_cities_wo...
Name: ted_talks_uni, Length: 2467, dtype: object