## 1) Loading dataset

In [3]:
import pandas as pd
df = pd.read_csv('ted_data.csv')
df.head()

Unnamed: 0,title,author,date,views,likes,link
0,Climate action needs new frontline leadership,Ozawa Bineshi Albert,December 2021,404000,12000,https://ted.com/talks/ozawa_bineshi_albert_cli...
1,The dark history of the overthrow of Hawaii,Sydney Iaukea,February 2022,214000,6400,https://ted.com/talks/sydney_iaukea_the_dark_h...
2,How play can spark new ideas for your business,Martin Reeves,September 2021,412000,12000,https://ted.com/talks/martin_reeves_how_play_c...
3,Why is China appointing judges to combat clima...,James K. Thornton,October 2021,427000,12000,https://ted.com/talks/james_k_thornton_why_is_...
4,Cement's carbon problem — and 2 ways to fix it,Mahendra Singhi,October 2021,2400,72,https://ted.com/talks/mahendra_singhi_cement_s...


## 2) Data Preprocessing

In [4]:
df.isnull().sum()           # Checking for null values

title     0
author    1
date      0
views     0
likes     0
link      0
dtype: int64

In [5]:
df = df.dropna()           # Dropping author null value

In [6]:
df.isnull().sum()

title     0
author    0
date      0
views     0
likes     0
link      0
dtype: int64

In [7]:
df.info()    # Checking for data types

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5439 entries, 0 to 5439
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   title   5439 non-null   object
 1   author  5439 non-null   object
 2   date    5439 non-null   object
 3   views   5439 non-null   int64 
 4   likes   5439 non-null   int64 
 5   link    5439 non-null   object
dtypes: int64(2), object(4)
memory usage: 297.4+ KB


In [8]:
df.describe()      # Checking for stats of column

Unnamed: 0,views,likes
count,5439.0,5439.0
mean,2061955.0,62619.12
std,3567316.0,107653.3
min,1200.0,37.0
25%,671000.0,20000.0
50%,1300000.0,41000.0
75%,2100000.0,65000.0
max,72000000.0,2100000.0


## 3) Data Analysis

### 1) Finding the most popular Ted talk speaker 

#### 1.1) In terms of number of talks

In [9]:
speaker_counts = df['author'].value_counts()
top_speaker = speaker_counts.head(5)
for i, (speaker, count) in enumerate(top_speaker.iteritems(),1):
    print(f"Speaker{i}: {speaker}")
    print("No of Talks: ", count)
    print(50*'-')  

Speaker1: Alex Gendler
No of Talks:  45
--------------------------------------------------
Speaker2: Iseult Gillespie
No of Talks:  33
--------------------------------------------------
Speaker3: Matt Walker
No of Talks:  18
--------------------------------------------------
Speaker4: Alex Rosenthal
No of Talks:  15
--------------------------------------------------
Speaker5: Elizabeth Cox
No of Talks:  13
--------------------------------------------------


#### 1.2) In terms of no of likes

In [10]:
speaker_likes = df.groupby('author')['likes'].sum()
top_speaker = speaker_likes.nlargest(5)
for i, (speaker, likes) in enumerate(top_speaker.iteritems(),1):
    print(f"Speaker{i}: {speaker}")
    print("Total Likes: ", likes)
    print(50*'-')  

Speaker1: Alex Gendler
Total Likes:  5691000
--------------------------------------------------
Speaker2: Sir Ken Robinson
Total Likes:  2833600
--------------------------------------------------
Speaker3: Bill Gates
Total Likes:  2349000
--------------------------------------------------
Speaker4: Simon Sinek
Total Likes:  2246000
--------------------------------------------------
Speaker5: Brené Brown
Total Likes:  2204000
--------------------------------------------------


#### 1.3) In terms of views

In [11]:
speaker_views = df.groupby('author')['views'].sum()
top_speaker = speaker_views.nlargest(5)
for i, (speaker, views) in enumerate(top_speaker.iteritems(),1):
    print(f"Speaker{i}: {speaker}")
    print("Total Likes: ", views)
    print(50*'-')  

Speaker1: Alex Gendler
Total Likes:  187196000
--------------------------------------------------
Speaker2: Sir Ken Robinson
Total Likes:  95654000
--------------------------------------------------
Speaker3: Bill Gates
Total Likes:  77800000
--------------------------------------------------
Speaker4: Simon Sinek
Total Likes:  74800000
--------------------------------------------------
Speaker5: Brené Brown
Total Likes:  72000000
--------------------------------------------------


### 2) Month wise Analysis of Ted Talk Frequency

In [17]:
df['date'] = pd.to_datetime(df['date'], format='%B %Y')  # Extracting month column from date column, where %B %Y means same 
df['month'] = df['date'].dt.strftime('%B')               # date (month) format like in dataset       

In [18]:
talk_per_month = df['month'].value_counts().sort_index()
for month, count in talk_per_month.iteritems():
    print(f" {month} : {count} talks")

 April : 576 talks
 August : 200 talks
 December : 334 talks
 February : 725 talks
 January : 147 talks
 July : 446 talks
 June : 493 talks
 March : 580 talks
 May : 322 talks
 November : 682 talks
 October : 585 talks
 September : 349 talks


### 3) Year wise Analysis of Ted Talk Frequency

In [19]:
df['date'] = pd.to_datetime(df['date'], format='%B %Y')  # Extracting month column from date column, where  %B %Y means same 
df['year'] = df['date'].dt.year                          # date (year) format like in dataset       

In [20]:
talk_per_year = df['year'].value_counts().sort_index()
for year, count in talk_per_year.iteritems():
    print(f"  Year {year} : {count} talks")

  Year 1970 : 2 talks
  Year 1972 : 1 talks
  Year 1983 : 1 talks
  Year 1984 : 1 talks
  Year 1990 : 1 talks
  Year 1991 : 1 talks
  Year 1994 : 1 talks
  Year 1998 : 6 talks
  Year 2001 : 5 talks
  Year 2002 : 26 talks
  Year 2003 : 34 talks
  Year 2004 : 33 talks
  Year 2005 : 65 talks
  Year 2006 : 49 talks
  Year 2007 : 113 talks
  Year 2008 : 84 talks
  Year 2009 : 233 talks
  Year 2010 : 267 talks
  Year 2011 : 271 talks
  Year 2012 : 302 talks
  Year 2013 : 388 talks
  Year 2014 : 357 talks
  Year 2015 : 376 talks
  Year 2016 : 399 talks
  Year 2017 : 495 talks
  Year 2018 : 473 talks
  Year 2019 : 544 talks
  Year 2020 : 501 talks
  Year 2021 : 390 talks
  Year 2022 : 20 talks


### 4) Finding Ted talks of your favourite author

In [21]:
your_fvrt_author = input('Enter your Favourite author: ').lower()
print('The following are the TED Talks of your favourite author: ')
print('-'*50)
for i in df.values:
    if your_fvrt_author == i[1].lower():
        print('Title : ', i[0])
        print('Author: ', i[1])
        print('Date  : ', i[2])
        print('Views : ', i[3])
        print('Likes : ', i[4])
        print('link  : ', i[5])
        print('-'*50)

Enter your Favourite author: bill gates
The following are the TED Talks of your favourite author: 
--------------------------------------------------
Title :  The innovations we need to avoid a climate disaster
Author:  Bill Gates
Date  :  2021-03-01 00:00:00
Views :  1700000
Likes :  53000
link  :  https://ted.com/talks/bill_gates_the_innovations_we_need_to_avoid_a_climate_disaster
--------------------------------------------------
Title :  How the pandemic will shape the near future
Author:  Bill Gates
Date  :  2020-06-01 00:00:00
Views :  4600000
Likes :  138000
link  :  https://ted.com/talks/bill_gates_how_the_pandemic_will_shape_the_near_future
--------------------------------------------------
Title :  How we must respond to the coronavirus pandemic
Author:  Bill Gates
Date  :  2020-03-01 00:00:00
Views :  8600000
Likes :  259000
link  :  https://ted.com/talks/bill_gates_how_we_must_respond_to_the_coronavirus_pandemic
--------------------------------------------------
Title :  Th

### 5) Finding Ted talks with best view to like ratio

In [22]:
ted_talk ={}

for i in df.values:
    ted_talk[i[0]] = round(i[3]/i[4], 2)

In [23]:
best_talk = ted_talk.items()
top_talk  = pd.DataFrame(best_talk, columns = ['Title', 'View to Likes ratio'])
top_talk.sort_values(by = 'View to Likes ratio', ascending = False).head(5)

Unnamed: 0,Title,View to Likes ratio
955,A camera that can see around corners,36.4
905,What's the point(e) of ballet?,36.4
837,How to see more and care less: The art of Geor...,36.4
26,Can you outsmart the fallacy that divided a na...,36.3
1016,The function and fashion of eyeglasses,36.3


### 6) Finding Ted Talks based on tags

In [24]:
tag = input('Enter a Tag: ').lower()
print('The following are the TED Talks based pn your tag: ')
print(50*'-')  
for i in df.values:
    if tag in i[0].lower():
        print('Title : ', i[0])
        print('Author: ', i[1])
        print('Date  : ', i[2])
        print('Views : ', i[3])
        print('Likes : ', i[4])
        print('link  : ', i[5])
        print(50*'-')  

Enter a Tag: climate
The following are the TED Talks based pn your tag: 
--------------------------------------------------
Title :  Climate action needs new frontline leadership
Author:  Ozawa Bineshi Albert
Date  :  2021-12-01 00:00:00
Views :  404000
Likes :  12000
link  :  https://ted.com/talks/ozawa_bineshi_albert_climate_action_needs_new_frontline_leadership
--------------------------------------------------
Title :  Why is China appointing judges to combat climate change?
Author:  James K. Thornton
Date  :  2021-10-01 00:00:00
Views :  427000
Likes :  12000
link  :  https://ted.com/talks/james_k_thornton_why_is_china_appointing_judges_to_combat_climate_change
--------------------------------------------------
Title :  The ocean's ingenious climate solutions
Author:  Susan Ruffo
Date  :  2021-10-01 00:00:00
Views :  522000
Likes :  15000
link  :  https://ted.com/talks/susan_ruffo_the_ocean_s_ingenious_climate_solutions
--------------------------------------------------
Title :  H