In [55]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import datetime

## looking into data

In [65]:
tweets_df = pd.read_csv("dataset/javascript_top.csv")
tweets_df.head().T

Unnamed: 0,0,1,2,3,4
body,"""Return [data, loading, error];}"" http://socia...","Late tweet, 5th 6th (Days 53, 54). Very little...",Vue.js 2 Essentials: Build Your First Vue App☞...,When u write 500 lines of codes and exit witho...,My #first chat app hope to collaborate #javasc...
likes,0,0,0,2,
link,/eibrahim/status/1093513673618964480,/L1K3R0535/status/1093513800098242565,/javascript_devv/status/1093514298486341632,/BarineSambaris/status/1092896518573568008,/GoodwishSifiso/status/1093224111919251456
replies,0,0,0,3,
retweets,,3,0,0,0
time,6:15 AM - 7 Feb 2019,6:16 AM - 7 Feb 2019,6:18 AM - 7 Feb 2019,1:23 PM - 5 Feb 2019,11:05 AM - 6 Feb 2019
writer,@eibrahim,@L1K3R0535,@javascript_devv,@BarineSambaris,@GoodwishSifiso


In [66]:
tweets_df.isna().sum()

body          0
likes        17
link          0
replies     172
retweets     40
time          0
writer        0
dtype: int64

### we found that all missing values are in fact  0 so we change them to 0

In [67]:
tweets_df.replies.fillna(0,inplace=True)
tweets_df.retweets.fillna(0,inplace=True)
tweets_df.likes.fillna(0,inplace=True)

### now we will convert our time feature to 3 features day and month and year

In [68]:
months = {'jan':1, 'feb':2, 'mar':3, 'apr':4,
          'may':5, 'jun':6, 'jul':7, 'aug':8,
          'sep':9, 'oct':10, 'nov':11, 'dec':12}

def get_time(date_time):
    return date_time.split('-')[0].strip()

def get_day(date_time):
    return int(date_time.split('-')[1].strip().split(' ')[0].strip())

def get_month(date_time):
    return months[date_time.split('-')[1].strip().split(' ')[1].strip().lower()]

def get_year(date_time):
    return int(date_time.split('-')[1].strip().split(' ')[2].strip())

In [69]:
def get_time_column(df, column_name):
    date_list = []
    column = df[column_name]
    for item in column:
        time = get_time(item)
        day = get_day(item)
        month = get_month(item)
        year = get_year(item)
        date_list.append(f'{time} - {day}/{month}/{year}')
    return pd.to_datetime(date_list, format='%I:%M %p - %d/%m/%Y')
tweets_df['time'] = get_time_column(tweets_df, 'time')
tweets_df['hour'] = tweets_df.time.dt.hour 

## Content exploration per period

In [74]:
def get_top_content(df, periods):
    top_content = []
    df['time-period'] = df.hour//4
    for period in range(periods):
        top_content.append(df[df['time-period'] == period])
    df.drop(columns=['time-period'], inplace=True)
    return top_content

In [76]:
x = get_top_content(tweets_df, 6)

In [77]:
x[0]

Unnamed: 0,body,likes,link,replies,retweets,time,writer,hour,time-period
7,The Complete Angular 5 Essentials Course For B...,0,/angular_geek/status/1093078423243640833,0.0,0.0,2019-02-06 01:26:00,@angular_geek,1,0
12,#TIL If you ever feel like storing a bool in s...,0,/kzadurska/status/1093462166294925312,0.0,0.0,2019-02-07 02:51:00,@kzadurska,2,0
13,"LightSureawasteful, wasteful, meaningful Javas...",0,/start_this_up/status/1093426606264090624,0.0,0.0,2019-02-07 00:29:00,@start_this_up,0,0
18,Nuxt.js - Vue.js on Steroids☞ http://bit.ly/2B...,0,/8Programming/status/1093091060417482752,0.0,2.0,2019-02-06 02:16:00,@8Programming,2,0
19,The Reality Of Coding Some days are really tou...,89,/DanEnglishby/status/1093421742406713345,2.0,34.0,2019-02-07 00:10:00,@DanEnglishby,0,0
27,Three Undocumented Features of #JSON https://b...,0,/lepinekongdev/status/1093104456709877761,0.0,0.0,2019-02-06 03:09:00,@lepinekongdev,3,0
37,Using flowcharts in my development gives me a ...,29,/SirMike99/status/1093455677459177474,0.0,11.0,2019-02-07 02:25:00,@SirMike99,2,0
38,Node.js - From Zero to Web App☞ http://bit.ly/...,3,/DevJs/status/1093445201572581376,0.0,2.0,2019-02-07 01:43:00,@DevJs,1,0
40,Complete React JS web developer with ES6 - Bui...,0,/A_programmers/status/1093442033925136385,0.0,2.0,2019-02-07 01:31:00,@A_programmers,1,0
50,Never ever get down hearted when learning to c...,232,/DanEnglishby/status/1092738562343223297,18.0,49.0,2019-02-05 02:55:00,@DanEnglishby,2,0
