In [2]:
import pandas as pd
import os

In [4]:
data_file = os.path.join("Resources", "ted_talks.csv")
ted_df = pd.read_csv(data_file)
ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [13]:
# Figure out the minimum and maximum views for a TED Talk
print(ted_df['views'].max())
print(ted_df['views'].min())

47227110
50443


In [14]:
# Create bins in which to place values based upon TED Talk views
bins = [0,200000,400000,600000,800000,1000000,2000000,3000000,4000000,5000000,50000000]

# Create labels for these bins
group_labels = ["0 to 200k","200k to 400k","400k to 600k","600k to 800k","800k to 1mil","1mil to 2mil",
                "2mil to 3mil","3mil to 4mil","4mil to 5mil","5mil to 50mil"]

In [15]:
# Slice the Views data and place it into bins
pd.cut(ted_df['views'], bins, labels=group_labels)

0       5mil to 50mil
1        3mil to 4mil
2        1mil to 2mil
3        1mil to 2mil
4       5mil to 50mil
5       5mil to 50mil
6        3mil to 4mil
7        800k to 1mil
8        2mil to 3mil
9        3mil to 4mil
10       1mil to 2mil
11       200k to 400k
12       600k to 800k
13       4mil to 5mil
14       200k to 400k
15       2mil to 3mil
16       1mil to 2mil
17       1mil to 2mil
18       1mil to 2mil
19       1mil to 2mil
20       800k to 1mil
21      5mil to 50mil
22       400k to 600k
23      5mil to 50mil
24       1mil to 2mil
25       1mil to 2mil
26       2mil to 3mil
27       2mil to 3mil
28      5mil to 50mil
29      5mil to 50mil
            ...      
2520     800k to 1mil
2521     600k to 800k
2522     600k to 800k
2523     600k to 800k
2524     400k to 600k
2525     600k to 800k
2526     600k to 800k
2527     600k to 800k
2528     400k to 600k
2529     800k to 1mil
2530     400k to 600k
2531     600k to 800k
2532     1mil to 2mil
2533     200k to 400k
2534     6

In [19]:
# Place the data series into a new column inside of the DataFrame
ted_df["View Group"] = pd.cut(ted_df['views'],bins, labels=group_labels)
ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,5mil to 50mil
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,3mil to 4mil
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,1mil to 2mil
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,1mil to 2mil
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,5mil to 50mil


In [18]:
# Get a count of each bin from the 'View Group' column
ted_df['View Group'].value_counts()

1mil to 2mil     1004
800k to 1mil      339
600k to 800k      307
2mil to 3mil      239
400k to 600k      234
200k to 400k      135
5mil to 50mil      99
3mil to 4mil       93
4mil to 5mil       68
0 to 200k          32
Name: View Group, dtype: int64

In [26]:
# Create a GroupBy object based upon "View Group"
ted_vg = ted_df.groupby("View Group")
ted_vg.max()

Unnamed: 0_level_0,comments,description,duration,event,languages,main_speaker,name,title,views
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0 to 200k,208,While we all agree that compassion is a great ...,3573,University of California,30,Tim Birkhead,Tim Birkhead: The early birdwatchers,Why I do theater,197139
200k to 400k,649,Zambia-born filmmaker Franco Sacchi tours us t...,3475,Taste3 2008,48,Virginia Postrel,Virginia Postrel: On glamour,Would you live in a floating city in the sky?,399332
400k to 600k,834,Yves Behar and Forrest North unveil Mission On...,5256,University of California,47,Zainab Salbi,"Zainab Salbi: Women, wartime and the dream of ...",You are the future of philanthropy,599444
600k to 800k,846,"“Men fight wars, and women mourn them,” says d...",2678,Web 2.0 Expo 2008,61,Yoruba Richen,Yoruba Richen: What the gay rights movement le...,Your health depends on where you live,799891
800k to 1mil,831,eL Seed fuses Arabic calligraphy with graffiti...,2853,Taste3 2008,51,eL Seed,"eL Seed: A project of peace, painted across 50...",Your company's data could help end world hunger,999700
1mil to 2mil,2492,"“If you really want to know a people, start by...",3608,Toronto Youth Corps,54,eL Seed,eL Seed: Street art with a message of hope and...,“Am I dying?” The honest answer.,1999097
2mil to 3mil,2673,“People are so afraid of variety that they try...,1485,TEDxZurich 2012,69,iO Tillett Wright,iO Tillett Wright: Fifty shades of gay,Your words may predict your future mental health,2991225
3mil to 4mil,3356,"You’re doing everything right at work, taking ...",2018,World Science Festival,56,Yves Morieux,"Yves Morieux: As work gets more complex, 6 rul...","Yup, I built a nuclear fusion reactor",3982352
4mil to 5mil,6404,Writer Andrew Solomon has spent his career tel...,2102,TEDxMidwest,58,Ze Frank,Ze Frank: Are you human?,Your brain on video games,4984884
5mil to 50mil,4553,You've never seen data presented like this. Wi...,2450,TEDxUW,72,Ze Frank,Ze Frank: Nerdcore comedy,Your elusive creative genius,47227110


In [24]:
# Get the average of each numeric column within the GroupBy object
ted_vg.mean()

Unnamed: 0_level_0,comments,duration,languages,views
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0 to 200k,76.9375,898.1875,4.0625,149801.0
200k to 400k,81.992593,832.192593,18.785185,322119.1
400k to 600k,107.162393,870.517094,22.940171,503840.3
600k to 800k,118.912052,829.039088,24.400651,705836.7
800k to 1mil,119.628319,798.772861,25.678466,902295.9
1mil to 2mil,168.136454,809.899402,27.899402,1368263.0
2mil to 3mil,299.481172,832.430962,32.807531,2390719.0
3mil to 4mil,360.870968,809.505376,34.258065,3440350.0
4mil to 5mil,507.088235,920.514706,35.720588,4468781.0
5mil to 50mil,650.393939,884.282828,40.252525,10838010.0
