In [6]:
# Import Dependencies
import pandas as pd
import numpy as np

In [2]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)

ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [9]:
# Figure out the minimum and maximum views for a TED Talk
print(ted_df['views'].max(), ted_df['views'].min())

47227110 50443


In [11]:
# Create bins in which to place values based upon TED Talk views
bins = [0, 200000-1, 400000-1, 600000-1, 800000-1, 1000000-1, 
       2000000-1, 3000000-1, 4000000-1, 5000000-1, 5000000000]

group_labels = ["0-199K", "200K - 399K", "400K - 599K", "600K - 799K", "800K - 999K",
               "1M - 2M", "2M - 3M", "3M - 4M", "4M - 5M", "5M to 500M"]
# Create labels for these bins
len(group_labels)

10

In [13]:
# Slice the data and place it into bins
pd.cut(ted_df["views"], bins, group_labels).value_counts()

(999999, 1999999]        1004
(799999, 999999]          339
(599999, 799999]          307
(1999999, 2999999]        239
(399999, 599999]          234
(199999, 399999]          135
(4999999, 5000000000]      99
(2999999, 3999999]         93
(3999999, 4999999]         68
(0, 199999]                32
Name: views, dtype: int64

In [15]:
# Place the data series into a new column inside of the DataFrame
ted_df["View Group"] = pd.cut(ted_df["views"], bins, group_labels)
ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,"(4999999, 5000000000]"
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,"(2999999, 3999999]"
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,"(999999, 1999999]"
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,"(999999, 1999999]"
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,"(4999999, 5000000000]"


In [19]:
# Create a GroupBy object based upon "View Group"
ted_group = ted_df.groupby("View Group")

# Find how many rows fall into each bin
print(ted_group['comments'].count())

# Get the average of each column within the GroupBy object

View Group
(0, 199999]                32
(199999, 399999]          135
(399999, 599999]          234
(599999, 799999]          307
(799999, 999999]          339
(999999, 1999999]        1004
(1999999, 2999999]        239
(2999999, 3999999]         93
(3999999, 4999999]         68
(4999999, 5000000000]      99
Name: comments, dtype: int64


In [20]:
ted_group[['comments', 'duration', 'languages']].mean()

Unnamed: 0_level_0,comments,duration,languages
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(0, 199999]",76.9375,898.1875,4.0625
"(199999, 399999]",81.992593,832.192593,18.785185
"(399999, 599999]",107.162393,870.517094,22.940171
"(599999, 799999]",118.912052,829.039088,24.400651
"(799999, 999999]",119.628319,798.772861,25.678466
"(999999, 1999999]",168.136454,809.899402,27.899402
"(1999999, 2999999]",299.481172,832.430962,32.807531
"(2999999, 3999999]",360.870968,809.505376,34.258065
"(3999999, 4999999]",507.088235,920.514706,35.720588
"(4999999, 5000000000]",650.393939,884.282828,40.252525
