In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)

ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [3]:
# Figure out the minimum and maximum views for a TED Talk

max_views = ted_df["views"].max()
min_views = ted_df["views"].min()

print("Max_Views", max_views)
print("Min_Views", min_views)

Max_Views 47227110
Min_Views 50443


In [4]:
# Create bins in which to place values based upon TED Talk views

bins = [x for x in range (0, int(max_views) + 1, int(max_views/10))]

print(bins)

# Create labels for these bins

group_names = ["< 5M", "< 10M", "< 15M", "< 20M", "< 25M", "< 30M", "< 35M", "< 40M", "< 45M", "< 50M"]

[0, 4722711, 9445422, 14168133, 18890844, 23613555, 28336266, 33058977, 37781688, 42504399, 47227110]


In [5]:
# Slice the data and place it into bins

ted_df["View Group"] = pd.cut(ted_df["views"], bins, labels = group_names)

In [6]:
# Place the data series into a new column inside of the DataFrame

ted_df

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,< 50M
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,< 5M
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,< 5M
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,< 5M
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,< 15M
...,...,...,...,...,...,...,...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",476,TED2017,4,Duarte Geraldino,Duarte Geraldino: What we're missing in the de...,What we're missing in the debate about immigra...,450430,< 5M
2546,6,How can you study Mars without a spaceship? He...,290,TED2017,3,Armando Azua-Bustos,Armando Azua-Bustos: The most Martian place on...,The most Martian place on Earth,417470,< 5M
2547,10,Science fiction visions of the future show us ...,651,TED2017,1,Radhika Nagpal,Radhika Nagpal: What intelligent machines can ...,What intelligent machines can learn from a sch...,375647,< 5M
2548,32,In an unmissable talk about race and politics ...,1100,TEDxMileHigh,1,Theo E.J. Wilson,Theo E.J. Wilson: A black man goes undercover ...,A black man goes undercover in the alt-right,419309,< 5M


In [7]:
# Create a GroupBy object based upon "View Group"

ted_gpby = ted_df.groupby("View Group")

# Find how many rows fall into each bin

ted_gpby.count()

Unnamed: 0_level_0,comments,description,duration,event,languages,main_speaker,name,title,views
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
< 5M,2434,2434,2434,2434,2434,2434,2434,2434,2434
< 10M,80,80,80,80,80,80,80,80,80
< 15M,15,15,15,15,15,15,15,15,15
< 20M,11,11,11,11,11,11,11,11,11
< 25M,6,6,6,6,6,6,6,6,6
< 30M,0,0,0,0,0,0,0,0,0
< 35M,1,1,1,1,1,1,1,1,1
< 40M,1,1,1,1,1,1,1,1,1
< 45M,0,0,0,0,0,0,0,0,0
< 50M,2,2,2,2,2,2,2,2,2


In [8]:
# Get the average of each column within the GroupBy object

ted_gpby[["comments", "duration", "languages"]].mean()

Unnamed: 0_level_0,comments,duration,languages
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
< 5M,171.966311,824.211175,26.736647
< 10M,429.425,843.2875,38.0375
< 15M,779.6,910.0,42.8
< 20M,724.909091,950.818182,41.727273
< 25M,866.0,861.666667,42.166667
< 30M,,,
< 35M,1927.0,1219.0,52.0
< 40M,1930.0,1084.0,45.0
< 45M,,,
< 50M,3421.5,1213.0,55.5
