In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import datetime

## looking into data

In [None]:
tweets_df = pd.read_csv("dataset/javascript_top.csv")
tweets_df.head().T

In [None]:
tweets_df.isna().sum()

### we found that all missing values are in fact  0 so we change them to 0

In [None]:
tweets_df.replies.fillna(0,inplace=True)
tweets_df.retweets.fillna(0,inplace=True)
tweets_df.likes.fillna(0,inplace=True)

### now we will convert our time feature to 3 features day and month and year

In [None]:
months = {'jan':1, 'feb':2, 'mar':3, 'apr':4,
          'may':5, 'jun':6, 'jul':7, 'aug':8,
          'sep':9, 'oct':10, 'nov':11, 'dec':12}

def get_time(date_time):
    return date_time.split('-')[0].strip()

def get_day(date_time):
    return int(date_time.split('-')[1].strip().split(' ')[0].strip())

def get_month(date_time):
    return months[date_time.split('-')[1].strip().split(' ')[1].strip().lower()]

def get_year(date_time):
    return int(date_time.split('-')[1].strip().split(' ')[2].strip())

In [None]:
def get_time_column(df, column_name):
    date_list = []
    column = df[column_name]
    for item in column:
        time = get_time(item)
        day = get_day(item)
        month = get_month(item)
        year = get_year(item)
        date_list.append(f'{time} - {day}/{month}/{year}')
    return pd.to_datetime(date_list, format='%I:%M %p - %d/%m/%Y')
tweets_df['time'] = get_time_column(tweets_df, 'time')

In [None]:
tweets_df['period'] =tweets_df.time.dt.hour // 6
tweets_df['day'] = tweets_df.time.dt.day
tweets_df['month'] = tweets_df.time.dt.month
tweets_df['week'] = tweets_df.time.dt.week % 4 + 1
tweets_df['year'] = tweets_df.time.dt.year

In [None]:
period_stats = tweets_df.groupby(by='period').count().reset_index()[['period', 'body']]
period_stats.columns = ['period', 'tweets']
period_stats.head()

In [None]:
period_stats.plot('period','tweets', kind='bar');

In [None]:
day_stats = (tweets_df.groupby(by=['day','month','year'])
                            .count()
                            .reset_index()[['day','month','year', 'body']])
day_stats.columns = ['day','month','year', 'tweets']
day_stats.head()

In [None]:
date = ([f'{day}/{month}/{year}' for day, month, year 
        in zip(day_stats.day, day_stats.month, day_stats.year)])
day_stats['date'] = pd.to_datetime(date, format='%d/%m/%Y')

In [None]:
day_stats.plot('date','tweets');

In [None]:
week_stats = (tweets_df.groupby(by=['week','month','year'])
                            .count()
                            .reset_index()[['week','month','year', 'body']])
week_stats.columns = ['week','month','year', 'tweets']
week_stats.head()

In [None]:
date = ([f'{week}/{month}/{year}' for week, month, year 
        in zip(week_stats.week, week_stats.month, week_stats.year)])
week_stats['week_number'] = date

In [None]:
week_stats.plot('week_number','tweets',kind='bar');

In [None]:
month_stats = (tweets_df.groupby(by=['month','year'])
                            .count()
                            .reset_index()[['month','year', 'body']])
month_stats.columns = ['month','year', 'tweets']
month_stats.head()

In [None]:
date = ([f'{month}/{year}' for month, year 
        in zip(month_stats.month, month_stats.year)])
month_stats['date'] = date

In [None]:
month_stats.plot('date','tweets', kind='bar');