### 1. Loading the dataset

In [23]:
import numpy as np
import pandas as pd
df = pd.read_csv('/kaggle/input/ted-talks/data.csv')

df.head()

Unnamed: 0,title,author,date,views,likes,link
0,Climate action needs new frontline leadership,Ozawa Bineshi Albert,December 2021,404000,12000,https://ted.com/talks/ozawa_bineshi_albert_cli...
1,The dark history of the overthrow of Hawaii,Sydney Iaukea,February 2022,214000,6400,https://ted.com/talks/sydney_iaukea_the_dark_h...
2,How play can spark new ideas for your business,Martin Reeves,September 2021,412000,12000,https://ted.com/talks/martin_reeves_how_play_c...
3,Why is China appointing judges to combat clima...,James K. Thornton,October 2021,427000,12000,https://ted.com/talks/james_k_thornton_why_is_...
4,Cement's carbon problem — and 2 ways to fix it,Mahendra Singhi,October 2021,2400,72,https://ted.com/talks/mahendra_singhi_cement_s...


### 2. Dealing with Null & Duplicae Values
##### 2.1 Checking null values

In [24]:
df.isnull().sum()

title     0
author    1
date      0
views     0
likes     0
link      0
dtype: int64

##### 2.2 Removing null values

In [25]:
df.dropna(inplace = True)

##### 2.3 Checking duplicate values

In [26]:
print(len(df) - len(df.drop_duplicates()))

0


### 3. Exploratory Data Analysis

##### 3.1 Explore Views Column

In [27]:
arr = np.array(df['views'])

print('----- Views Analysis -----')
print('Minimun : ', np.min(arr))
print('Maximum : ', np.max(arr))
print('Total   : ', np.sum(arr))
print('Average : ', int(np.mean(arr)))
print('--------------------------')

----- Views Analysis -----
Minimun :  1200
Maximum :  72000000
Total   :  11214972679
Average :  2061954
--------------------------


##### 3.2 Explore Likes Column

In [31]:
arr = np.array(df['likes'])

print('----- Likes Analysis -----')
print('Minimun : ', np.min(arr))
print('Maximum : ', np.max(arr))
print('Total   : ', np.sum(arr))
print('Average : ', int(np.mean(arr)))
print('--------------------------')

----- Likes Analysis -----
Minimun :  37
Maximum :  2100000
Total   :  340585420
Average :  62619
--------------------------


##### 3.3 Explore Titles Column

In [40]:
print(len(df) - len(df['title'].unique()))

0


##### 3.4 Explore Authors Column

In [47]:
print('Number of authors : ', len(df['author'].unique()))

Number of authors :  4443


### 4. Data Cleaning & Feature Engineering
##### 4.1 Creating new columns from data | month & year

In [79]:
month = []
year = []

for i in df['date']:
    month.append(i.split(' ')[0])
    year.append(int(i.split(' ')[1]))

df['month'] = month
df['year'] = year
df.head()

Unnamed: 0,title,author,date,views,likes,link,month,year
0,Climate action needs new frontline leadership,Ozawa Bineshi Albert,December 2021,404000,12000,ozawa_bineshi_albert_climate_action_needs_new_...,December,2021
1,The dark history of the overthrow of Hawaii,Sydney Iaukea,February 2022,214000,6400,sydney_iaukea_the_dark_history_of_the_overthro...,February,2022
2,How play can spark new ideas for your business,Martin Reeves,September 2021,412000,12000,martin_reeves_how_play_can_spark_new_ideas_for...,September,2021
3,Why is China appointing judges to combat clima...,James K. Thornton,October 2021,427000,12000,james_k_thornton_why_is_china_appointing_judge...,October,2021
4,Cement's carbon problem — and 2 ways to fix it,Mahendra Singhi,October 2021,2400,72,mahendra_singhi_cement_s_carbon_problem_and_2_...,October,2021


##### 4.2 Cleaning link column

In [80]:
df['link'] = [i.split('/')[-1] for i in df['link']]
df.head()

Unnamed: 0,title,author,date,views,likes,link,month,year
0,Climate action needs new frontline leadership,Ozawa Bineshi Albert,December 2021,404000,12000,ozawa_bineshi_albert_climate_action_needs_new_...,December,2021
1,The dark history of the overthrow of Hawaii,Sydney Iaukea,February 2022,214000,6400,sydney_iaukea_the_dark_history_of_the_overthro...,February,2022
2,How play can spark new ideas for your business,Martin Reeves,September 2021,412000,12000,martin_reeves_how_play_can_spark_new_ideas_for...,September,2021
3,Why is China appointing judges to combat clima...,James K. Thornton,October 2021,427000,12000,james_k_thornton_why_is_china_appointing_judge...,October,2021
4,Cement's carbon problem — and 2 ways to fix it,Mahendra Singhi,October 2021,2400,72,mahendra_singhi_cement_s_carbon_problem_and_2_...,October,2021


##### 4.3 Creating views_to_like ratio column

In [86]:
df['view_to_like_ratio'] = round(df['views']/df['likes'],2)
df.head()

Unnamed: 0,title,author,date,views,likes,link,month,year,view_to_like_ratio
0,Climate action needs new frontline leadership,Ozawa Bineshi Albert,December 2021,404000,12000,ozawa_bineshi_albert_climate_action_needs_new_...,December,2021,33.67
1,The dark history of the overthrow of Hawaii,Sydney Iaukea,February 2022,214000,6400,sydney_iaukea_the_dark_history_of_the_overthro...,February,2022,33.44
2,How play can spark new ideas for your business,Martin Reeves,September 2021,412000,12000,martin_reeves_how_play_can_spark_new_ideas_for...,September,2021,34.33
3,Why is China appointing judges to combat clima...,James K. Thornton,October 2021,427000,12000,james_k_thornton_why_is_china_appointing_judge...,October,2021,35.58
4,Cement's carbon problem — and 2 ways to fix it,Mahendra Singhi,October 2021,2400,72,mahendra_singhi_cement_s_carbon_problem_and_2_...,October,2021,33.33


In [90]:
df['author'].nunique()

4443

----- Views Analysis -----
Minimun :  532
Maximum :  72000000
Total   :  11214973211
Average :  2061575
--------------------------


Unnamed: 0,title,author,date,views,likes,link,view_to_like_ratio
0,Climate action needs new frontline leadership,Ozawa Bineshi Albert,December 2021,404000,12000,https://ted.com/talks/ozawa_bineshi_albert_cli...,33.666667
1,The dark history of the overthrow of Hawaii,Sydney Iaukea,February 2022,214000,6400,https://ted.com/talks/sydney_iaukea_the_dark_h...,33.4375
2,How play can spark new ideas for your business,Martin Reeves,September 2021,412000,12000,https://ted.com/talks/martin_reeves_how_play_c...,34.333333
3,Why is China appointing judges to combat clima...,James K. Thornton,October 2021,427000,12000,https://ted.com/talks/james_k_thornton_why_is_...,35.583333
4,Cement's carbon problem — and 2 ways to fix it,Mahendra Singhi,October 2021,2400,72,https://ted.com/talks/mahendra_singhi_cement_s...,33.333333


In [78]:
arr = np.array(df['view_to_like_ratio'])

print('----- Ratio Analysis -----')
print('Minimun : ', np.min(arr))
print('Maximum : ', np.max(arr))
print('Total   : ', np.sum(arr))
print('Average : ', int(np.mean(arr)))
print('--------------------------')

----- Ratio Analysis -----
Minimun :  30.3951367781155
Maximum :  36.4
Total   :  180146.4432242789
Average :  33
--------------------------
