In [29]:
# Import depenencies
import pandas as pd
import json
from datetime import datetime as dt

In [30]:
# Read in the category file
f = open("./Data/US_category_id.json")
data = json.load(f)

In [31]:
# Create a dictionary of category id and description
category_dict = {}

for i in data["items"]:
    category = {i["id"] : i["snippet"]["title"]}
    category_dict.update(category)

In [32]:
category_df = pd.DataFrame(category_dict.items(), columns=["category_id", "category_name"])

In [33]:
category_df

Unnamed: 0,category_id,category_name
0,1,Film & Animation
1,2,Autos & Vehicles
2,10,Music
3,15,Pets & Animals
4,17,Sports
5,18,Short Movies
6,19,Travel & Events
7,20,Gaming
8,21,Videoblogging
9,22,People & Blogs


In [34]:
# Save the category_df as csv for future use
category_df.to_csv("../Data/category_id.csv", index=False, index_label=None)

## USvideos.csv cleaning

In [35]:
# read in the USvideos.csv file
file = "./Data/USvideos.csv"
us_videos_df = pd.read_csv(file)
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
0,2kyS6SvSYSE,17.14.11,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13T17:13:01.000Z,SHANtell martin,748374,57527,2966,15954,https://i.ytimg.com/vi/2kyS6SvSYSE/default.jpg,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...
1,1ZAPwfrtAFY,17.14.11,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13T07:30:00.000Z,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,https://i.ytimg.com/vi/1ZAPwfrtAFY/default.jpg,False,False,False,"One year after the presidential election, John..."
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...
3,puqaWrEC7tY,17.14.11,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13T11:00:04.000Z,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,https://i.ytimg.com/vi/puqaWrEC7tY/default.jpg,False,False,False,Today we find out if Link is a Nickelback amat...
4,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...


In [36]:
# Get intial over view of the dataset with info() method
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40949 entries, 0 to 40948
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   video_id                40949 non-null  object
 1   trending_date           40949 non-null  object
 2   title                   40949 non-null  object
 3   channel_title           40949 non-null  object
 4   category_id             40949 non-null  int64 
 5   publish_time            40949 non-null  object
 6   tags                    40949 non-null  object
 7   views                   40949 non-null  int64 
 8   likes                   40949 non-null  int64 
 9   dislikes                40949 non-null  int64 
 10  comment_count           40949 non-null  int64 
 11  thumbnail_link          40949 non-null  object
 12  comments_disabled       40949 non-null  bool  
 13  ratings_disabled        40949 non-null  bool  
 14  video_error_or_removed  40949 non-null  bool  
 15  de

Note: The dataset is daily trending list from 2017-11-14 to 2018-04-16, in the data set there will be the same video stayed on the trending list for multiple days and resulting multiple entries with columns like "views", "likes", "dislikes", "comment_count" will be accumulated, so our approch to deal with this case of duolicated data is to based on the "video_id" column which have unique value for each videos, keep the last occurrence which will have the most data for the columns mentioned above, and drop the other ones.

In [37]:
# Drop duplicated entries and keep the last occurrence as the true data.
us_videos_df.drop_duplicates(subset='video_id', keep="first", inplace=True)
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 0 to 40766
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   video_id                6351 non-null   object
 1   trending_date           6351 non-null   object
 2   title                   6351 non-null   object
 3   channel_title           6351 non-null   object
 4   category_id             6351 non-null   int64 
 5   publish_time            6351 non-null   object
 6   tags                    6351 non-null   object
 7   views                   6351 non-null   int64 
 8   likes                   6351 non-null   int64 
 9   dislikes                6351 non-null   int64 
 10  comment_count           6351 non-null   int64 
 11  thumbnail_link          6351 non-null   object
 12  comments_disabled       6351 non-null   bool  
 13  ratings_disabled        6351 non-null   bool  
 14  video_error_or_removed  6351 non-null   bool  
 15  des

### "category_id" column

In [38]:
# Create a dictionary
categories = {1 : "Film & Animation",
2 : "Autos & Vehicles",
10 : "Music",
15 : "Pets & Animals",
17 : "Sports",
18 : "Short Movies",
19 : "Travel & Events",
20 : "Gaming",
21 : "Videoblogging",
22 : "People & Blogs",
23 : "Comedy",
24 : "Entertainment",
25 : "News & Politics",
26 : "Howto & Style",
27 : "Education",
28 : "Science & Technology",
29 : "Nonprofits & Activism",
30 : "Movies",
31 : "Anime/Animation",
32 : "Action/Adventure",
33 : "Classics",
34 : "Comedy",
35 : "Documentary",
36 : "Drama",
37 : "Family",
38 : "Foreign",
39 : "Horror",
49 : "Sci-Fi/Fantasy",
41 : "Thriller",
42 : "Shorts",
43 : "Shows",
44 : "Trailers"
}

In [39]:
# Create a duplicate of 'category_id' to apply the dictionary to
us_videos_df['category_type'] = us_videos_df.loc[:,'category_id']
# Apply the dictionary
us_videos_df['category_type'] = us_videos_df['category_type'].apply(lambda x : categories[x])

# Check the parsed dataframe
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type
0,2kyS6SvSYSE,17.14.11,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13T17:13:01.000Z,SHANtell martin,748374,57527,2966,15954,https://i.ytimg.com/vi/2kyS6SvSYSE/default.jpg,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...,People & Blogs
1,1ZAPwfrtAFY,17.14.11,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13T07:30:00.000Z,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,https://i.ytimg.com/vi/1ZAPwfrtAFY/default.jpg,False,False,False,"One year after the presidential election, John...",Entertainment
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...,Comedy
3,puqaWrEC7tY,17.14.11,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13T11:00:04.000Z,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,https://i.ytimg.com/vi/puqaWrEC7tY/default.jpg,False,False,False,Today we find out if Link is a Nickelback amat...,Entertainment
4,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...,Entertainment


### Drop thumbnail_link column

In [40]:
# Drop thumbnail_link and the columns
us_videos_df = us_videos_df.drop(columns=["thumbnail_link"], axis=1)
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type
0,2kyS6SvSYSE,17.14.11,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13T17:13:01.000Z,SHANtell martin,748374,57527,2966,15954,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...,People & Blogs
1,1ZAPwfrtAFY,17.14.11,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13T07:30:00.000Z,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,False,False,False,"One year after the presidential election, John...",Entertainment
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...,Comedy
3,puqaWrEC7tY,17.14.11,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13T11:00:04.000Z,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,False,False,False,Today we find out if Link is a Nickelback amat...,Entertainment
4,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,False,False,False,I know it's been a while since we did this sho...,Entertainment


### "trending_date" and "publish_time" columns

In [41]:
# Convert the trending__date datatype to datetime
us_videos_df["trending_date"] = pd.to_datetime(us_videos_df["trending_date"], format="%y.%d.%m")

In [42]:
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type
0,2kyS6SvSYSE,2017-11-14,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13T17:13:01.000Z,SHANtell martin,748374,57527,2966,15954,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...,People & Blogs
1,1ZAPwfrtAFY,2017-11-14,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13T07:30:00.000Z,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,False,False,False,"One year after the presidential election, John...",Entertainment
2,5qpjK5DgCt4,2017-11-14,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...,Comedy
3,puqaWrEC7tY,2017-11-14,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13T11:00:04.000Z,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,False,False,False,Today we find out if Link is a Nickelback amat...,Entertainment
4,d380meD0W0M,2017-11-14,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,False,False,False,I know it's been a while since we did this sho...,Entertainment


In [43]:
# Convert the publish_time to datetime
us_videos_df["publish_time"] = us_videos_df["publish_time"].astype("datetime64[ns]")

In [44]:
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 0 to 40766
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   video_id                6351 non-null   object        
 1   trending_date           6351 non-null   datetime64[ns]
 2   title                   6351 non-null   object        
 3   channel_title           6351 non-null   object        
 4   category_id             6351 non-null   int64         
 5   publish_time            6351 non-null   datetime64[ns]
 6   tags                    6351 non-null   object        
 7   views                   6351 non-null   int64         
 8   likes                   6351 non-null   int64         
 9   dislikes                6351 non-null   int64         
 10  comment_count           6351 non-null   int64         
 11  comments_disabled       6351 non-null   bool          
 12  ratings_disabled        6351 non-null   bool   

In [45]:
# # Extracting publish dates and publish time to seperate columns
# us_videos_df['publish_date'] = us_videos_df['publish_time'].dt.date
# us_videos_df['publish_time'] = us_videos_df['publish_time'].dt.time
# us_videos_df.head()

In [46]:
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 0 to 40766
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   video_id                6351 non-null   object        
 1   trending_date           6351 non-null   datetime64[ns]
 2   title                   6351 non-null   object        
 3   channel_title           6351 non-null   object        
 4   category_id             6351 non-null   int64         
 5   publish_time            6351 non-null   datetime64[ns]
 6   tags                    6351 non-null   object        
 7   views                   6351 non-null   int64         
 8   likes                   6351 non-null   int64         
 9   dislikes                6351 non-null   int64         
 10  comment_count           6351 non-null   int64         
 11  comments_disabled       6351 non-null   bool          
 12  ratings_disabled        6351 non-null   bool   

In [47]:
# us_videos_df['publish_date'] = pd.to_datetime(us_videos_df['publish_date'])
# #us_videos_df['publish_time'] = pd.to_datetime(us_videos_df['publish_time'])
# us_videos_df.info()

In [49]:
# Extracting the time difference from publish_date to trending_date and a new column "diff_days".
us_videos_df['diff_days'] = (us_videos_df['trending_date'] - us_videos_df['publish_time']).dt.days
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type,diff_days
0,2kyS6SvSYSE,2017-11-14,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13 17:13:01,SHANtell martin,748374,57527,2966,15954,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...,People & Blogs,0
1,1ZAPwfrtAFY,2017-11-14,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13 07:30:00,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,False,False,False,"One year after the presidential election, John...",Entertainment,0
2,5qpjK5DgCt4,2017-11-14,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12 19:05:24,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...,Comedy,1
3,puqaWrEC7tY,2017-11-14,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13 11:00:04,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,False,False,False,Today we find out if Link is a Nickelback amat...,Entertainment,0
4,d380meD0W0M,2017-11-14,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12 18:01:41,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,False,False,False,I know it's been a while since we did this sho...,Entertainment,1


In [50]:
# Check the value count for diff_days column
us_videos_df['diff_days'].value_counts()

0       2736
1       1816
2        687
3        364
4        238
        ... 
2162       1
26         1
1819       1
255        1
479        1
Name: diff_days, Length: 123, dtype: int64

In [52]:
# Reorder the columns
us_videos_df = us_videos_df[["video_id", 
                            "publish_time", 
                            "trending_date",
                            "diff_days",
                            "title", 
                            "channel_title", 
                            "category_id", 
                            "category_type", 
                            "tags", 
                            "views", 
                            "likes", 
                            "dislikes", 
                            "comment_count", 
                            "comments_disabled", 
                            "ratings_disabled", 
                            "video_error_or_removed", 
                            "description"]]

us_videos_df.head()

Unnamed: 0,video_id,publish_time,trending_date,diff_days,title,channel_title,category_id,category_type,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description
0,2kyS6SvSYSE,2017-11-13 17:13:01,2017-11-14,0,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,People & Blogs,SHANtell martin,748374,57527,2966,15954,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...
1,1ZAPwfrtAFY,2017-11-13 07:30:00,2017-11-14,0,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,Entertainment,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,False,False,False,"One year after the presidential election, John..."
2,5qpjK5DgCt4,2017-11-12 19:05:24,2017-11-14,1,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,Comedy,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...
3,puqaWrEC7tY,2017-11-13 11:00:04,2017-11-14,0,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,Entertainment,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,False,False,False,Today we find out if Link is a Nickelback amat...
4,d380meD0W0M,2017-11-12 18:01:41,2017-11-14,1,I Dare You: GOING BALD!?,nigahiga,24,Entertainment,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,False,False,False,I know it's been a while since we did this sho...


## Remove "description" column

In [53]:
# Remove "description" column
us_videos_df = us_videos_df.drop(columns="description")
us_videos_df.head()

Unnamed: 0,video_id,publish_time,trending_date,diff_days,title,channel_title,category_id,category_type,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed
0,2kyS6SvSYSE,2017-11-13 17:13:01,2017-11-14,0,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,People & Blogs,SHANtell martin,748374,57527,2966,15954,False,False,False
1,1ZAPwfrtAFY,2017-11-13 07:30:00,2017-11-14,0,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,Entertainment,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,False,False,False
2,5qpjK5DgCt4,2017-11-12 19:05:24,2017-11-14,1,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,Comedy,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,False,False,False
3,puqaWrEC7tY,2017-11-13 11:00:04,2017-11-14,0,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,Entertainment,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,False,False,False
4,d380meD0W0M,2017-11-12 18:01:41,2017-11-14,1,I Dare You: GOING BALD!?,nigahiga,24,Entertainment,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,False,False,False


### "tags" column

In [54]:
# Checking tags format for the first three rows
print(us_videos_df.loc[0]['tags'])
print("--------------------------")
print(us_videos_df.loc[1]['tags'])
print("--------------------------")
print(us_videos_df.loc[2]['tags'])

SHANtell martin
--------------------------
last week tonight trump presidency|"last week tonight donald trump"|"john oliver trump"|"donald trump"
--------------------------
racist superman|"rudy"|"mancuso"|"king"|"bach"|"racist"|"superman"|"love"|"rudy mancuso poo bear black white official music video"|"iphone x by pineapple"|"lelepons"|"hannahstocking"|"rudymancuso"|"inanna"|"anwar"|"sarkis"|"shots"|"shotsstudios"|"alesso"|"anitta"|"brazil"|"Getting My Driver's License | Lele Pons"


In [56]:
# Create tags_count variable 
tags = us_videos_df['tags'].str.split("|")
tags

0                                        [SHANtell martin]
1        [last week tonight trump presidency, "last wee...
2        [racist superman, "rudy", "mancuso", "king", "...
3        [rhett and link, "gmm", "good mythical morning...
4        [ryan, "higa", "higatv", "nigahiga", "i dare y...
                               ...                        
40760    [Laura88Lee, "crayola", "crayon makeup", "cray...
40761    [espn, "espn live", "boston celtics", "kyrie i...
40762    [The Late Late Show, "Late Late Show", "James ...
40764    [bon appetit, "burgers", "cheeseburgers", "how...
40766    [jimmy, "kimmel", "live", "late", "night", "ta...
Name: tags, Length: 6351, dtype: object

In [59]:
# Create tags_count column and using list comprehension to fill in the counts of tags for each row
us_videos_df['tags_count'] = [len(i) for i in tags]
us_videos_df.head()

Unnamed: 0,video_id,publish_time,trending_date,diff_days,title,channel_title,category_id,category_type,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,tags_count
0,2kyS6SvSYSE,2017-11-13 17:13:01,2017-11-14,0,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,People & Blogs,SHANtell martin,748374,57527,2966,15954,False,False,False,1
1,1ZAPwfrtAFY,2017-11-13 07:30:00,2017-11-14,0,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,Entertainment,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,False,False,False,4
2,5qpjK5DgCt4,2017-11-12 19:05:24,2017-11-14,1,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,Comedy,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,False,False,False,23
3,puqaWrEC7tY,2017-11-13 11:00:04,2017-11-14,0,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,Entertainment,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,False,False,False,27
4,d380meD0W0M,2017-11-12 18:01:41,2017-11-14,1,I Dare You: GOING BALD!?,nigahiga,24,Entertainment,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,False,False,False,14


In [60]:
# Export the dataframe to a csv file
us_videos_df.to_csv("../Data/us_videos_cleaned_2.csv", index=False)