# Import modules

In [60]:
import numpy as np
import pandas as pd
import matplotlib as plt

### Support function

In [None]:
# Reformat the date from y.d.m to d/m/y
def reformat_date(video_list, feature_date='trending_date'):
    """
        Reformat the date from y.d.m to d/m/Y for easy reading
        
        Args:
            video_list (ndarray): the list of video comes from one country
            feature_date (string): the feature's name (Default: trending_date)
            
        Returns:
            video_list (ndarray): the list of video comes from one country after reformating
    
    """
    video_list[feature_date] = pd.to_datetime(video_list[feature_date], format='%y.%d.%m')
    
    video_list[feature_date] = video_list[feature_date].dt.strftime('%d/%m/%Y')
    
    return video_list

# Identify the type of features
def type_of_feature(video_list):
    """
        Identity the type of features in dataset
        
        Args:
            video_list (ndarray): the list of video comes from one country
        
        Returns:
            list_of_type (object): the list of feature's type
    """
    return video_list.dtypes



### Cleaning the data

In [62]:
# Take the Canada Videos as the example
caVideo = pd.read_csv('./youtube_data/CAvideos.csv')

reformat_date(caVideo)
caVideo.head(3)

Unnamed: 0,video_id,trending_date,title,channel_title,category_id,publish_time,tags,views,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,video_error_or_removed,description
0,n1WpP7iowLc,14/11/2017,Eminem - Walk On Water (Audio) ft. Beyoncé,EminemVEVO,10,2017-11-10T17:00:03.000Z,"Eminem|""Walk""|""On""|""Water""|""Aftermath/Shady/In...",17158579,787425,43420,125882,https://i.ytimg.com/vi/n1WpP7iowLc/default.jpg,False,False,False,Eminem's new track Walk on Water ft. Beyoncé i...
1,0dBIkQ4Mz1M,14/11/2017,PLUSH - Bad Unboxing Fan Mail,iDubbbzTV,23,2017-11-13T17:00:00.000Z,"plush|""bad unboxing""|""unboxing""|""fan mail""|""id...",1014651,127794,1688,13030,https://i.ytimg.com/vi/0dBIkQ4Mz1M/default.jpg,False,False,False,STill got a lot of packages. Probably will las...
2,5qpjK5DgCt4,14/11/2017,"Racist Superman | Rudy Mancuso, King Bach & Le...",Rudy Mancuso,23,2017-11-12T19:05:24.000Z,"racist superman|""rudy""|""mancuso""|""king""|""bach""...",3191434,146035,5339,8181,https://i.ytimg.com/vi/5qpjK5DgCt4/default.jpg,False,False,False,WATCH MY PREVIOUS VIDEO ▶ \n\nSUBSCRIBE ► http...


### Exploring the data

In [63]:
type_of_feature(caVideo)

video_id                  object
trending_date             object
title                     object
channel_title             object
category_id                int64
publish_time              object
tags                      object
views                      int64
likes                      int64
dislikes                   int64
comment_count              int64
thumbnail_link            object
comments_disabled           bool
ratings_disabled            bool
video_error_or_removed      bool
description               object
dtype: object

In [None]:
unique_trending_date, count_date = np.unique(caVideo['trending_date'], return_counts=True)