In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)

ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [3]:
# Figure out the minimum and maximum views for a TED Talk
print(ted_df["views"].min())
print(ted_df["views"].max())

50443
47227110


In [4]:
ted_df["views"].describe()


count    2.550000e+03
mean     1.698297e+06
std      2.498479e+06
min      5.044300e+04
25%      7.557928e+05
50%      1.124524e+06
75%      1.700760e+06
max      4.722711e+07
Name: views, dtype: float64

In [5]:
# Create bins in which to place values based upon TED Talk views
bins = [0, 4000000, 8000000, 12000000, 16000000, 20000000, 24000000, 28000000, 32000000, 36000000, 40000000, 50000000]
# Create labels for these bins
group_labels = ["< 4 million", "4-8 million", "8-12 million", "12-16 million", "16-20 million", "20-24 million", "24-28 million",
               "28-32 million", "32-36 million", "36-40 million", ">40 million"] 

In [6]:
# Slice the data and place it into bins
pd.cut(ted_df["views"], bins)

0       (40000000, 50000000]
1               (0, 4000000]
2               (0, 4000000]
3               (0, 4000000]
4       (12000000, 16000000]
                ...         
2545            (0, 4000000]
2546            (0, 4000000]
2547            (0, 4000000]
2548            (0, 4000000]
2549            (0, 4000000]
Name: views, Length: 2550, dtype: category
Categories (11, interval[int64]): [(0, 4000000] < (4000000, 8000000] < (8000000, 12000000] < (12000000, 16000000] ... (28000000, 32000000] < (32000000, 36000000] < (36000000, 40000000] < (40000000, 50000000]]

In [7]:
# Place the data series into a new column inside of the DataFrame
ted_df["View Group"] = pd.cut(ted_df["views"], bins, labels=group_labels)
ted_df

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,>40 million
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,< 4 million
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,< 4 million
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,< 4 million
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,12-16 million
...,...,...,...,...,...,...,...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",476,TED2017,4,Duarte Geraldino,Duarte Geraldino: What we're missing in the de...,What we're missing in the debate about immigra...,450430,< 4 million
2546,6,How can you study Mars without a spaceship? He...,290,TED2017,3,Armando Azua-Bustos,Armando Azua-Bustos: The most Martian place on...,The most Martian place on Earth,417470,< 4 million
2547,10,Science fiction visions of the future show us ...,651,TED2017,1,Radhika Nagpal,Radhika Nagpal: What intelligent machines can ...,What intelligent machines can learn from a sch...,375647,< 4 million
2548,32,In an unmissable talk about race and politics ...,1100,TEDxMileHigh,1,Theo E.J. Wilson,Theo E.J. Wilson: A black man goes undercover ...,A black man goes undercover in the alt-right,419309,< 4 million


In [8]:
# Create a GroupBy object based upon "View Group"
ted_viewers = ted_df.groupby(ted_df["View Group"])

In [13]:
# Find how many rows fall into each bin
ted_viewers["name"].count()

View Group
< 4 million      2383
4-8 million       114
8-12 million       26
12-16 million      11
16-20 million       7
20-24 million       5
24-28 million       0
28-32 million       1
32-36 million       1
36-40 million       0
>40 million         2
Name: name, dtype: int64

In [14]:
# Get the average of each column within the GroupBy object
ted_viewers["comments", "duration", "languages"].mean()

  


Unnamed: 0_level_0,comments,duration,languages
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
< 4 million,163.496853,821.427612,26.549727
4-8 million,476.333333,900.850877,37.035088
8-12 million,594.807692,835.692308,38.653846
12-16 million,708.0,917.181818,42.545455
16-20 million,895.142857,900.428571,43.0
20-24 million,870.0,918.6,42.0
24-28 million,,,
28-32 million,1927.0,1219.0,52.0
32-36 million,1930.0,1084.0,45.0
36-40 million,,,


In [12]:
ted_viewers.describe()

Unnamed: 0_level_0,comments,comments,comments,comments,comments,comments,comments,comments,duration,duration,...,languages,languages,views,views,views,views,views,views,views,views
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
View Group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
< 4 million,2383.0,163.496853,198.599276,2.0,60.0,109.0,201.0,3356.0,2383.0,821.427612,...,32.0,69.0,2383.0,1239538.0,735097.2,50443.0,733694.0,1073549.0,1541201.0,3982352.0
4-8 million,114.0,476.333333,630.681871,46.0,230.25,355.5,540.0,6404.0,114.0,900.850877,...,40.75,65.0,114.0,5253622.0,1117892.0,4016531.0,4375387.75,4806318.0,6122079.5,7988894.0
8-12 million,26.0,594.807692,368.200925,129.0,323.25,567.0,760.0,1627.0,26.0,835.692308,...,42.0,72.0,26.0,9276625.0,924926.7,8033595.0,8663852.5,9085206.5,9938934.0,11443190.0
12-16 million,11.0,708.0,402.421918,285.0,432.0,659.0,831.5,1604.0,11.0,917.181818,...,47.5,49.0,11.0,13876240.0,1219687.0,12005869.0,13226909.5,13926113.0,14717353.5,15601385.0
16-20 million,7.0,895.142857,306.816682,527.0,657.5,846.0,1124.5,1329.0,7.0,900.428571,...,44.0,46.0,7.0,17431150.0,1404437.0,16097077.0,16405827.0,16861578.0,18230129.0,19787465.0
20-24 million,5.0,870.0,1138.00681,150.0,297.0,354.0,672.0,2877.0,5.0,918.6,...,45.0,49.0,5.0,21243550.0,721483.2,20475972.0,20685401.0,21190883.0,21594632.0,22270883.0
24-28 million,0.0,,,,,,,,0.0,,...,,,0.0,,,,,,,
28-32 million,1.0,1927.0,,1927.0,1927.0,1927.0,1927.0,1927.0,1.0,1219.0,...,52.0,52.0,1.0,31168150.0,,31168150.0,31168150.0,31168150.0,31168150.0,31168150.0
32-36 million,1.0,1930.0,,1930.0,1930.0,1930.0,1930.0,1930.0,1.0,1084.0,...,45.0,45.0,1.0,34309430.0,,34309432.0,34309432.0,34309432.0,34309432.0,34309432.0
36-40 million,0.0,,,,,,,,0.0,,...,,,0.0,,,,,,,
