# Tweet Metrics Analysis

In [1]:
import matplotlib.pyplot as plt
from collections import defaultdict
import plotly.express as px
import pandas as pd
import kaleido

### Load dataset

In [3]:
df = pd.read_csv('all_tweet_metrics.csv', dtype={
    'id': 'string',
    'author_id': 'string',
    'retweets': 'int',
    'replies': 'int',
    'likes': 'int',
    'quotes': 'int',
    'created_at': 'string'
})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8581627 entries, 0 to 8581626
Data columns (total 7 columns):
 #   Column      Dtype 
---  ------      ----- 
 0   id          string
 1   author_id   string
 2   retweets    int32 
 3   replies     int32 
 4   likes       int32 
 5   quotes      int32 
 6   created_at  string
dtypes: int32(4), string(3)
memory usage: 327.4 MB


In [4]:
df.head()

Unnamed: 0,id,author_id,retweets,replies,likes,quotes,created_at
0,7094,15,9,2,4,0,2006-06-19T22:52:24.000Z
1,56935,885,0,0,1,0,2006-11-05T19:19:57.000Z
2,10581681,797223,0,0,0,0,2007-03-21T17:22:33.000Z
3,13458821,1456281,0,0,0,0,2007-03-27T05:58:28.000Z
4,22038081,2838921,0,0,0,0,2007-04-08T13:44:12.000Z


### Monthly frequency of tweets

In [5]:
df.created_at.loc[0][0:7]

'2006-06'

In [6]:
freq = defaultdict(int)

for i in df.index:
    year_month = df.created_at.loc[i][0:7]
    freq[year_month] += 1

### Monthly Freq Graph

In [7]:
def month_index(date_str):
    parts = date_str.split('-')
    year = int(parts[0]) - 2006
    month = int(parts[1])
    # print(date_str)
    return year * 12 + month

print(month_index('2006-05'))
print(month_index('2010-01'))

5
49


In [8]:
freq_sorted = sorted(freq.items(), key=lambda d: month_index(d[0]))

In [9]:
g_data = pd.DataFrame(data=freq_sorted, columns=['date', 'freq'])
g_data.head()

Unnamed: 0,date,freq
0,2006-06,1
1,2006-11,1
2,2007-03,2
3,2007-04,2
4,2007-05,19


In [None]:
fig = px.line(g_data, x='date', y="freq")
fig.update_layout(
            title='Monthly Tweet Counts',
            xaxis_title = 'Months',
            yaxis_title = 'Tweet counts',
            legend_title = 'Legends')
fig.write_image('C:/Users/shuvo/OneDrive/Studies/MSc/GA/Works/tweets_monthly_freq.jpeg')
fig.show()