In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)

ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [8]:
# Figure out the minimum and maximum views for a TED Talk
print(ted_df["views"].max())
print(ted_df["views"].min())

47227110
50443


In [11]:
# Create bins in which to place values based upon TED Talk views
bins = [0, 199999, 399999, 599999, 799999, 999999,
        1999999, 2999999, 3999999, 4999999, 50000000]
# Create labels for these bins
group_labels = ["0_to_199k", "200_to_399k", "400_to_599k", 
                "600_to_799k", "800_to_999k", "1_to_2mil", 
                "2_to_3mil","3_to_4mil","4_to_5mil","5_to_50mil"]

In [12]:
# Slice the data and place it into bins
pd.cut(ted_df["views"], bins, labels=group_labels).head()

0    5_to_50mil
1     3_to_4mil
2     1_to_2mil
3     1_to_2mil
4    5_to_50mil
Name: views, dtype: category
Categories (10, object): ['0_to_199k' < '200_to_399k' < '400_to_599k' < '600_to_799k' ... '2_to_3mil' < '3_to_4mil' < '4_to_5mil' < '5_to_50mil']

In [13]:
# Place the data series into a new column inside of the DataFrame
ted_df["Views_Category"] = pd.cut(ted_df["views"], bins, labels=group_labels)
ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,Views_Category
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,5_to_50mil
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,3_to_4mil
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,1_to_2mil
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,1_to_2mil
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,5_to_50mil


In [16]:
# Create a GroupBy object based upon "View Group", like adding the rows to pivot list
ted_viewscat_group = ted_df.groupby("Views_Category")
# Find how many rows fall into each bin, like adding comments to be counted in values of pivot table
print(ted_viewscat_group["comments"].count())

# Get the average of each column within the GroupBy object, adding the other columns to the pivot values
round(ted_group[["comments", "duration", "languages"]].mean(),2)

Views_Category
0_to_199k        32
200_to_399k     135
400_to_599k     234
600_to_799k     307
800_to_999k     339
1_to_2mil      1004
2_to_3mil       239
3_to_4mil        93
4_to_5mil        68
5_to_50mil       99
Name: comments, dtype: int64


Unnamed: 0_level_0,comments,duration,languages
Views_Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0_to_199k,76.94,898.19,4.06
200_to_399k,81.99,832.19,18.79
400_to_599k,107.16,870.52,22.94
600_to_799k,118.91,829.04,24.4
800_to_999k,119.63,798.77,25.68
1_to_2mil,168.14,809.9,27.9
2_to_3mil,299.48,832.43,32.81
3_to_4mil,360.87,809.51,34.26
4_to_5mil,507.09,920.51,35.72
5_to_50mil,650.39,884.28,40.25
