In [1]:
# Import depenencies
import pandas as pd
import json
from datetime import datetime as dt

In [2]:
# Read in the category file
f = open("./Data/US_category_id.json")
data = json.load(f)

In [3]:
# Create a dictionary of category id and description
category_dict = {}

for i in data["items"]:
    category = {i["id"] : i["snippet"]["title"]}
    category_dict.update(category)

In [4]:
category_df = pd.DataFrame(category_dict.items(), columns=["category_id", "category_name"])

In [5]:
category_df

Unnamed: 0,category_id,category_name
0,1,Film & Animation
1,2,Autos & Vehicles
2,10,Music
3,15,Pets & Animals
4,17,Sports
5,18,Short Movies
6,19,Travel & Events
7,20,Gaming
8,21,Videoblogging
9,22,People & Blogs


In [6]:
# Save the category_df as csv for future use
category_df.to_csv("./Data/category_id.csv", index=False, index_label=None)

## USvideos.csv cleaning

In [7]:
# read in the USvideos.csv file
file = "./Data/USvideos.csv"
us_videos_df = pd.read_csv(file)
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
0,2kyS6SvSYSE,17.14.11,WE WANT TO TALK ABOUT OUR MARRIAGE,CaseyNeistat,22,2017-11-13T17:13:01.000Z,SHANtell martin,748374,57527,2966,15954,https://i.ytimg.com/vi/2kyS6SvSYSE/default.jpg,False,False,False,SHANTELL'S CHANNEL - https://www.youtube.com/s...
1,1ZAPwfrtAFY,17.14.11,The Trump Presidency: Last Week Tonight with J...,LastWeekTonight,24,2017-11-13T07:30:00.000Z,"last week tonight trump presidency|""last week ...",2418783,97185,6146,12703,https://i.ytimg.com/vi/1ZAPwfrtAFY/default.jpg,False,False,False,"One year after the presidential election, John..."
2,5qpjK5DgCt4,17.14.11,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146033,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...
3,puqaWrEC7tY,17.14.11,Nickelback Lyrics: Real or Fake?,Good Mythical Morning,24,2017-11-13T11:00:04.000Z,"rhett and link|""gmm""|""good mythical morning""|""...",343168,10172,666,2146,https://i.ytimg.com/vi/puqaWrEC7tY/default.jpg,False,False,False,Today we find out if Link is a Nickelback amat...
4,d380meD0W0M,17.14.11,I Dare You: GOING BALD!?,nigahiga,24,2017-11-12T18:01:41.000Z,"ryan|""higa""|""higatv""|""nigahiga""|""i dare you""|""...",2095731,132235,1989,17518,https://i.ytimg.com/vi/d380meD0W0M/default.jpg,False,False,False,I know it's been a while since we did this sho...


In [8]:
# Get intial over view of the dataset with info() method
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40949 entries, 0 to 40948
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   video_id                40949 non-null  object
 1   trending_date           40949 non-null  object
 2   title                   40949 non-null  object
 3   channel_title           40949 non-null  object
 4   category_id             40949 non-null  int64 
 5   publish_time            40949 non-null  object
 6   tags                    40949 non-null  object
 7   views                   40949 non-null  int64 
 8   likes                   40949 non-null  int64 
 9   dislikes                40949 non-null  int64 
 10  comment_count           40949 non-null  int64 
 11  thumbnail_link          40949 non-null  object
 12  comments_disabled       40949 non-null  bool  
 13  ratings_disabled        40949 non-null  bool  
 14  video_error_or_removed  40949 non-null  bool  
 15  de

Note: The dataset is daily trending list from 2017-11-14 to 2018-04-16, in the data set there will be the same video stayed on the trending list for multiple days and resulting multiple entries with columns like "views", "likes", "dislikes", "comment_count" will be accumulated, so our approch to deal with this case of duolicated data is to based on the "video_id" column which have unique value for each videos, keep the last occurrence which will have the most data for the columns mentioned above, and drop the other ones.

In [10]:
# Drop duplicated entries and keep the last occurrence as the true data.
us_videos_df.drop_duplicates(subset='video_id', keep="last", inplace=True)
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 10 to 40948
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   video_id                6351 non-null   object
 1   trending_date           6351 non-null   object
 2   title                   6351 non-null   object
 3   channel_title           6351 non-null   object
 4   category_id             6351 non-null   int64 
 5   publish_time            6351 non-null   object
 6   tags                    6351 non-null   object
 7   views                   6351 non-null   int64 
 8   likes                   6351 non-null   int64 
 9   dislikes                6351 non-null   int64 
 10  comment_count           6351 non-null   int64 
 11  thumbnail_link          6351 non-null   object
 12  comments_disabled       6351 non-null   bool  
 13  ratings_disabled        6351 non-null   bool  
 14  video_error_or_removed  6351 non-null   bool  
 15  de

### "category_id" column

In [11]:
# Create a dictionary
categories = {1 : "Film & Animation",
2 : "Autos & Vehicles",
10 : "Music",
15 : "Pets & Animals",
17 : "Sports",
18 : "Short Movies",
19 : "Travel & Events",
20 : "Gaming",
21 : "Videoblogging",
22 : "People & Blogs",
23 : "Comedy",
24 : "Entertainment",
25 : "News & Politics",
26 : "Howto & Style",
27 : "Education",
28 : "Science & Technology",
29 : "Nonprofits & Activism",
30 : "Movies",
31 : "Anime/Animation",
32 : "Action/Adventure",
33 : "Classics",
34 : "Comedy",
35 : "Documentary",
36 : "Drama",
37 : "Family",
38 : "Foreign",
39 : "Horror",
49 : "Sci-Fi/Fantasy",
41 : "Thriller",
42 : "Shorts",
43 : "Shows",
44 : "Trailers"
}

In [12]:
# Create a duplicate of 'category_id' to apply the dictionary to
us_videos_df['category_type'] = us_videos_df.loc[:,'category_id']
# Apply the dictionary
us_videos_df['category_type'] = us_videos_df['category_type'].apply(lambda x : categories[x])

# Check the parsed dataframe
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type
10,9wRQljFNDW8,17.14.11,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,2017-11-13T02:05:26.000Z,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,https://i.ytimg.com/vi/9wRQljFNDW8/default.jpg,False,False,False,New England Patriots returner Dion Lewis blast...,Sports
36,Om_zGhJLZ5U,17.14.11,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,2017-11-12T18:00:03.000Z,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,https://i.ytimg.com/vi/Om_zGhJLZ5U/default.jpg,False,False,False,With Justice League approaching fast we rewatc...,Film & Animation
41,goP4Z5wyOlM,17.14.11,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,2017-11-12T21:16:40.000Z,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,https://i.ytimg.com/vi/goP4Z5wyOlM/default.jpg,False,False,False,A strong 7.2-magnitude earthquake has rattled ...,News & Politics
55,8NHA23f7LvU,17.14.11,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,2017-11-10T19:06:23.000Z,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,https://i.ytimg.com/vi/8NHA23f7LvU/default.jpg,False,False,False,I think Sarah Millican was very excited for th...,Entertainment
76,IE-xepGLVt8,17.14.11,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,2017-11-10T12:04:17.000Z,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,https://i.ytimg.com/vi/IE-xepGLVt8/default.jpg,False,False,False,One and a half years after the surgery that tr...,Science & Technology


### Drop thumbnail_link column

In [13]:
# Drop thumbnail_link and the columns
us_videos_df = us_videos_df.drop(columns=["thumbnail_link"], axis=1)
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type
10,9wRQljFNDW8,17.14.11,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,2017-11-13T02:05:26.000Z,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,False,False,False,New England Patriots returner Dion Lewis blast...,Sports
36,Om_zGhJLZ5U,17.14.11,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,2017-11-12T18:00:03.000Z,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,False,False,False,With Justice League approaching fast we rewatc...,Film & Animation
41,goP4Z5wyOlM,17.14.11,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,2017-11-12T21:16:40.000Z,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,False,False,False,A strong 7.2-magnitude earthquake has rattled ...,News & Politics
55,8NHA23f7LvU,17.14.11,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,2017-11-10T19:06:23.000Z,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,False,False,False,I think Sarah Millican was very excited for th...,Entertainment
76,IE-xepGLVt8,17.14.11,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,2017-11-10T12:04:17.000Z,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,False,False,False,One and a half years after the surgery that tr...,Science & Technology


### "trending_date" and "publish_time" columns

In [14]:
# Convert the trending__date datatype to datetime
us_videos_df["trending_date"] = pd.to_datetime(us_videos_df["trending_date"], format="%y.%d.%m")

In [15]:
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type
10,9wRQljFNDW8,2017-11-14,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,2017-11-13T02:05:26.000Z,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,False,False,False,New England Patriots returner Dion Lewis blast...,Sports
36,Om_zGhJLZ5U,2017-11-14,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,2017-11-12T18:00:03.000Z,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,False,False,False,With Justice League approaching fast we rewatc...,Film & Animation
41,goP4Z5wyOlM,2017-11-14,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,2017-11-12T21:16:40.000Z,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,False,False,False,A strong 7.2-magnitude earthquake has rattled ...,News & Politics
55,8NHA23f7LvU,2017-11-14,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,2017-11-10T19:06:23.000Z,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,False,False,False,I think Sarah Millican was very excited for th...,Entertainment
76,IE-xepGLVt8,2017-11-14,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,2017-11-10T12:04:17.000Z,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,False,False,False,One and a half years after the surgery that tr...,Science & Technology


In [16]:
# Convert the publish_time to datetime
us_videos_df["publish_time"] = us_videos_df["publish_time"].astype("datetime64[ns]")

In [17]:
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 10 to 40948
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   video_id                6351 non-null   object        
 1   trending_date           6351 non-null   datetime64[ns]
 2   title                   6351 non-null   object        
 3   channel_title           6351 non-null   object        
 4   category_id             6351 non-null   int64         
 5   publish_time            6351 non-null   datetime64[ns]
 6   tags                    6351 non-null   object        
 7   views                   6351 non-null   int64         
 8   likes                   6351 non-null   int64         
 9   dislikes                6351 non-null   int64         
 10  comment_count           6351 non-null   int64         
 11  comments_disabled       6351 non-null   bool          
 12  ratings_disabled        6351 non-null   bool  

In [18]:
# Extracting publish dates and publish time to seperate columns
us_videos_df['publish_date'] = us_videos_df['publish_time'].dt.date
us_videos_df['publish_time'] = us_videos_df['publish_time'].dt.time
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type,publish_date
10,9wRQljFNDW8,2017-11-14,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,02:05:26,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,False,False,False,New England Patriots returner Dion Lewis blast...,Sports,2017-11-13
36,Om_zGhJLZ5U,2017-11-14,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,18:00:03,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,False,False,False,With Justice League approaching fast we rewatc...,Film & Animation,2017-11-12
41,goP4Z5wyOlM,2017-11-14,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,21:16:40,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,False,False,False,A strong 7.2-magnitude earthquake has rattled ...,News & Politics,2017-11-12
55,8NHA23f7LvU,2017-11-14,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,19:06:23,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,False,False,False,I think Sarah Millican was very excited for th...,Entertainment,2017-11-10
76,IE-xepGLVt8,2017-11-14,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,12:04:17,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,False,False,False,One and a half years after the surgery that tr...,Science & Technology,2017-11-10


In [19]:
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 10 to 40948
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   video_id                6351 non-null   object        
 1   trending_date           6351 non-null   datetime64[ns]
 2   title                   6351 non-null   object        
 3   channel_title           6351 non-null   object        
 4   category_id             6351 non-null   int64         
 5   publish_time            6351 non-null   object        
 6   tags                    6351 non-null   object        
 7   views                   6351 non-null   int64         
 8   likes                   6351 non-null   int64         
 9   dislikes                6351 non-null   int64         
 10  comment_count           6351 non-null   int64         
 11  comments_disabled       6351 non-null   bool          
 12  ratings_disabled        6351 non-null   bool  

In [20]:
us_videos_df['publish_date'] = pd.to_datetime(us_videos_df['publish_date'])
#us_videos_df['publish_time'] = pd.to_datetime(us_videos_df['publish_time'])
us_videos_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6351 entries, 10 to 40948
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   video_id                6351 non-null   object        
 1   trending_date           6351 non-null   datetime64[ns]
 2   title                   6351 non-null   object        
 3   channel_title           6351 non-null   object        
 4   category_id             6351 non-null   int64         
 5   publish_time            6351 non-null   object        
 6   tags                    6351 non-null   object        
 7   views                   6351 non-null   int64         
 8   likes                   6351 non-null   int64         
 9   dislikes                6351 non-null   int64         
 10  comment_count           6351 non-null   int64         
 11  comments_disabled       6351 non-null   bool          
 12  ratings_disabled        6351 non-null   bool  

In [21]:
# Extracting the time difference from publish_date to trending_date and a new column "diff_days".
us_videos_df['diff_days'] = (us_videos_df['trending_date'] - us_videos_df['publish_date']).dt.days
us_videos_df.head()

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description,category_type,publish_date,diff_days
10,9wRQljFNDW8,2017-11-14,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,02:05:26,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,False,False,False,New England Patriots returner Dion Lewis blast...,Sports,2017-11-13,1
36,Om_zGhJLZ5U,2017-11-14,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,18:00:03,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,False,False,False,With Justice League approaching fast we rewatc...,Film & Animation,2017-11-12,2
41,goP4Z5wyOlM,2017-11-14,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,21:16:40,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,False,False,False,A strong 7.2-magnitude earthquake has rattled ...,News & Politics,2017-11-12,2
55,8NHA23f7LvU,2017-11-14,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,19:06:23,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,False,False,False,I think Sarah Millican was very excited for th...,Entertainment,2017-11-10,4
76,IE-xepGLVt8,2017-11-14,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,12:04:17,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,False,False,False,One and a half years after the surgery that tr...,Science & Technology,2017-11-10,4


In [22]:
# Check the value count for diff_days column
us_videos_df['diff_days'].value_counts()

6       984
7       979
5       645
8       527
4       344
       ... 
2863      1
2940      1
584       1
3113      1
66        1
Name: diff_days, Length: 130, dtype: int64

In [23]:
# Reorder the columns
us_videos_df = us_videos_df[["video_id", 
                            "publish_date", 
                            "publish_time", 
                            "trending_date",
                            "diff_days",
                            "title", 
                            "channel_title", 
                            "category_id", 
                            "category_type", 
                            "tags", 
                            "views", 
                            "likes", 
                            "dislikes", 
                            "comment_count", 
                            "comments_disabled", 
                            "ratings_disabled", 
                            "video_error_or_removed", 
                            "description"]]

us_videos_df.head()

Unnamed: 0,video_id,publish_date,publish_time,trending_date,diff_days,title,channel_title,category_id,category_type,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed,description
10,9wRQljFNDW8,2017-11-13,02:05:26,2017-11-14,1,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,Sports,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,False,False,False,New England Patriots returner Dion Lewis blast...
36,Om_zGhJLZ5U,2017-11-12,18:00:03,2017-11-14,2,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,Film & Animation,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,False,False,False,With Justice League approaching fast we rewatc...
41,goP4Z5wyOlM,2017-11-12,21:16:40,2017-11-14,2,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,News & Politics,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,False,False,False,A strong 7.2-magnitude earthquake has rattled ...
55,8NHA23f7LvU,2017-11-10,19:06:23,2017-11-14,4,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,Entertainment,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,False,False,False,I think Sarah Millican was very excited for th...
76,IE-xepGLVt8,2017-11-10,12:04:17,2017-11-14,4,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,Science & Technology,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,False,False,False,One and a half years after the surgery that tr...


## Remove "description" column

In [24]:
# Remove "description" column
us_videos_df = us_videos_df.drop(columns="description")
us_videos_df.head()

Unnamed: 0,video_id,publish_date,publish_time,trending_date,diff_days,title,channel_title,category_id,category_type,tags,views,likes,dislikes,comment_count,comments_disabled,ratings_disabled,video_error_or_removed
10,9wRQljFNDW8,2017-11-13,02:05:26,2017-11-14,1,Dion Lewis' 103-Yd Kick Return TD vs. Denver! ...,NFL,17,Sports,"NFL|""Football""|""offense""|""defense""|""afc""|""nfc""...",81377,655,25,177,False,False,False
36,Om_zGhJLZ5U,2017-11-12,18:00:03,2017-11-14,2,TL;DW - Every DCEU Movie Before Justice League,Screen Junkies,1,Film & Animation,"screenjunkies|""screen junkies""|""sj news""|""hone...",288922,7515,792,2111,False,False,False
41,goP4Z5wyOlM,2017-11-12,21:16:40,2017-11-14,2,Iraq-Iran earthquake: Deadly tremor hits borde...,BBC News,25,News & Politics,"bbc|""bbc news""|""news""|""iran""|""iran news""|""iraq...",34785,308,26,413,False,False,False
55,8NHA23f7LvU,2017-11-10,19:06:23,2017-11-14,4,Jason Momoa Wows Hugh Grant With Some Dothraki...,The Graham Norton Show,24,Entertainment,"Graham Norton|""Graham Norton Show Official""|""E...",1496225,16116,236,605,False,False,False
76,IE-xepGLVt8,2017-11-10,12:04:17,2017-11-14,4,Mayo Clinic's first face transplant patient me...,Mayo Clinic,28,Science & Technology,"Mayo Clinic|""Health Care (Issue)""|""Healthcare ...",237307,1896,74,260,False,False,False


In [25]:
# Export the dataframe to a csv file
us_videos_df.to_csv("../Data/us_videos_cleaned.csv", index=False)

### "tags" column

In [23]:
# # Checking tags format for the first three rows
# print(us_videos_df.loc[0]['tags'])
# print("--------------------------")
# print(us_videos_df.loc[1]['tags'])
# print("--------------------------")
# print(us_videos_df.loc[2]['tags'])

In [24]:
# # Create tags_count variable 
# tags = us_videos_df['tags'].str.split("|")
# tags

In [25]:
# # Remove the quotations from tags
# tags = [item.strip("") for item in tags]
    
# tags

In [26]:
# # Create tags_count column and using list comprehension to fill in the counts of tags for each row
# us_videos_df['tags_count'] = [len(i) for i in tags]
# us_videos_df.head()