In [1]:
import pandas as pd
import numpy as np
import re
import ast
import matplotlib.pyplot as plt
import plotly.plotly as py
import seaborn as sns
import plotly.graph_objs as go
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
%matplotlib inline
init_notebook_mode(connected=True)

In [2]:
tweets_df=pd.read_csv("dataset/javascript_top.csv")
tweets_df.replies.fillna(0,inplace=True)
tweets_df.retweets.fillna(0,inplace=True)
tweets_df.likes.fillna(0,inplace=True)

In [3]:
def get_time(date_time):
    return date_time.split('-')[0].strip()

def get_day(date_time):
    return date_time.split('-')[1].strip().split(' ')[0].strip()

def get_month(date_time):
    return date_time.split('-')[1].strip().split(' ')[1].strip()

def get_year(date_time):
    return date_time.split('-')[1].strip().split(' ')[2].strip()



In [4]:
day=[]
month=[]
year=[]
timestamp=[]
hashtags=[]
mentions=[]
links=[]
hashtags_numbers=[]
mentions_numbers=[]
for i in range(len(tweets_df)):
    tweet_body=tweets_df.iloc[i]['body'].replace('pic',' pic')
    tweet_time=tweets_df.iloc[i]['time']
    timestamp.append(get_time(tweet_time))
    day.append(get_day(tweet_time))
    month.append(get_month(tweet_time))
    year.append(get_year(tweet_time))
    hashtags.append([x.lower() for x in list(set(re.findall(r"#(\w+)", tweet_body)))])
    mentions.append(list(set(re.findall(r"@(\w+)", tweet_body))))
    hashtags_numbers.append(len(hashtags[-1]))
    mentions_numbers.append(len(mentions[-1]))
    urls=re.findall(r"(http://[^ ]+)", tweet_body)
    urls.extend(re.findall(r"(https://[^ ]+)", tweet_body))
    links.append([url.split()[0] for url in urls])
tweets_df['timestamp']=timestamp
tweets_df['year']=year
tweets_df['month']=month
tweets_df['day']=day
tweets_df['hashtags']=hashtags
tweets_df['mentions']=mentions
tweets_df['hashtags_numbers']=hashtags_numbers
tweets_df['mentions_numbers']=mentions_numbers
tweets_df['links']=links
#tweets_df.drop(columns=['time'],inplace=True)

In [5]:
def get_frequency(df,target_column):
    items_frequency={}
    for i in range(len(df)):
        tweet=df.iloc[i]
        for item in tweet[target_column]:
            if(item in items_frequency):
                items_frequency[str(item).lower()]+=1
            else:
                items_frequency[str(item).lower()]=1
    return items_frequency

In [6]:
hashtags_frequency=get_frequency(tweet_time,"hashtags")
top_hashtags=pd.DataFrame(sorted(hashtags_frequency.items(), key=lambda hashtags_frequency: hashtags_frequency[1],reverse=True)[:50],columns=['hashtag','frequency'])

AttributeError: 'str' object has no attribute 'iloc'

In [None]:
plot([go.Bar(x=top_hashtags.hashtag.values, y=top_hashtags.frequency.values)],filename='hashtags.html')

## Hashtags Analysis

In [None]:
nodes_list=[]
for i in range(len(top_hashtags)):
    node_id=top_hashtags.iloc[i]['hashtag']
    node_size=top_hashtags.iloc[i]['frequency']/25
    nodes_list.append({"id":node_id,"size":node_size})

In [None]:
adj_matrix=pd.DataFrame(columns=[item['id'] for item in nodes_list],index=[item['id'] for item in nodes_list])
adj_matrix.fillna(0,inplace=True)

In [None]:
c=0
for tweet_hashtag_list in tweets_df.hashtags:
    for node in nodes_list:
        target_hashtag=node['id']
        if(target_hashtag in tweet_hashtag_list):
            for tweet_hashtag in tweet_hashtag_list:
                if(tweet_hashtag in adj_matrix.columns):
                    adj_matrix.loc[target_hashtag,tweet_hashtag]+=1

In [None]:
edges_list=[]
for i in range(len(adj_matrix)):
    for j in range(i+1,len(adj_matrix)):
        s=adj_matrix.iloc[i].index[i]
        t=adj_matrix.iloc[j].index[j]
        v=adj_matrix.loc[s][t]/10
        if(v>4.5):
            nodes_ids=[item['id'] for item in nodes_list]
            if(s in nodes_ids and t in nodes_ids):
                edges_list.append({"source":s,"target":t,"value":v})

In [None]:
import json
with open("hashtagsGraph.json","w") as f:
    json.dump({"nodes":nodes_list,"links":edges_list},f)

In [None]:
print(len(nodes_list),(len(edges_list)))

## Mentions Analysis

In [5]:
top_mentioners=tweets_df[tweets_df.mentions_numbers>0].groupby(by="writer").count().sort_values(by="mentions_numbers",ascending=False).reset_index()[['writer','mentions_numbers']]

In [6]:
top_mentioners=top_mentioners[top_mentioners.mentions_numbers>2]

In [7]:
top_mentioners

Unnamed: 0,writer,mentions_numbers
0,@JavaScriptKicks,16
1,@eggheadio,11
2,@AngularInDepth,9
3,@webtech_4u,6
4,@vuejsamsterdam,5
5,@cosmic_js,4
6,@joinindorse,4
7,@Frontend_Love,4
8,@sejournal,3
9,@carsoncgibbons,3


In [13]:
top_mentioners_tweets=tweets_df.merge(top_mentioners,on=['writer'])

In [25]:
mentions_frequency=get_frequency(tweets_df,"mentions")
top_mentions=pd.DataFrame(sorted(mentions_frequency.items(), key=lambda mentions_frequency: mentions_frequency[1],reverse=True)[:50],columns=['mentioned','frequency'])

In [26]:
plot([go.Bar(x=top_mentions.mentioned.values, y=top_mentions.frequency.values)],filename='mentions.html')

'file:///home/mahmoud/sipof.ink/JupyterProjects/socialMediaAnalysis/Twitter_Analytics/mentions.html'

In [33]:
top_mentioners_people_for_writers={}
for writer in top_mentioners.writer:
    writer_tweets=top_mentioners_tweets[top_mentioners_tweets.writer==writer]
    mentioned_people=get_frequency(writer_tweets,"mentions")
    top_mentioners_people_for_writers[writer]=sorted(mentioned_people.items(), key=lambda mentioned_people: mentioned_people[1],reverse=True)

In [34]:
top_mentioners_people_for_writers

{'@AngularInDepth': [('trotylyu', 1),
  ('wesgrimes', 1),
  ('yurzui', 1),
  ('layzeedk', 1),
  ('guganarumugam93', 1),
  ('nate_lapinski', 1),
  ('tim_deschryver', 1),
  ('ylubianov', 1),
  ('ncjamieson', 1)],
 '@Frontend_Love': [('youyuxi', 3),
  ('rickhanlonii', 2),
  ('webpack', 2),
  ('jhnnns', 1),
  ('fbjest', 1),
  ('thelarkinn', 1),
  ('vuejs', 1),
  ('kylemathews', 1),
  ('johnlindquist', 1),
  ('pablodeeleman', 1),
  ('jenlooper', 1),
  ('eagancheva', 1),
  ('anfibiacreativa', 1)],
 '@JavaScriptKicks': [('bit_src', 3),
  ('thepracticaldev', 3),
  ('scotch_io', 3),
  ('javascriptkicks', 1),
  ('emmawedekind', 1),
  ('discoversdks', 1),
  ('codementorio', 1),
  ('ar_goyal', 1),
  ('dzurico', 1)],
 '@carsoncgibbons': [('cosmic_js', 3),
  ('vuetifyjs', 2),
  ('hackernoon', 2),
  ('aaron_vail', 1),
  ('netlify', 1),
  ('vuejs', 1),
  ('nodejs', 1),
  ('codingcoach_io', 1),
  ('logmein', 1),
  ('emmawedekind', 1)],
 '@cosmic_js': [('jlengstorf', 1),
  ('gatsbyjs', 1),
  ('ossia', 1

In [35]:
tweets_df

Unnamed: 0,body,likes,link,replies,retweets,time,writer,timestamp,year,month,day,hashtags,mentions,hashtags_numbers,mentions_numbers,links
0,"""Return [data, loading, error];}"" http://socia...",0,/eibrahim/status/1093513673618964480,0.0,0.0,6:15 AM - 7 Feb 2019,@eibrahim,6:15 AM,2019,Feb,7,"[programming, javascript, react]",[],3,0,[http://social.frontendweekly.co/byvWm6Os]
1,"Late tweet, 5th 6th (Days 53, 54). Very little...",0,/L1K3R0535/status/1093513800098242565,0.0,3.0,6:16 AM - 7 Feb 2019,@L1K3R0535,6:16 AM,2019,Feb,7,"[css, code, html, javascript, 100daysofcode, p...",[],7,0,[]
2,Vue.js 2 Essentials: Build Your First Vue App☞...,0,/javascript_devv/status/1093514298486341632,0.0,0.0,6:18 AM - 7 Feb 2019,@javascript_devv,6:18 AM,2019,Feb,7,"[vuejs, javascript]",[],2,0,[http://bit.ly/2RLQa3E]
3,When u write 500 lines of codes and exit witho...,2,/BarineSambaris/status/1092896518573568008,3.0,0.0,1:23 PM - 5 Feb 2019,@BarineSambaris,1:23 PM,2019,Feb,5,"[coding, python, code, software, softwaredevel...",[],7,0,[]
4,My #first chat app hope to collaborate #javasc...,0,/GoodwishSifiso/status/1093224111919251456,0.0,0.0,11:05 AM - 6 Feb 2019,@GoodwishSifiso,11:05 AM,2019,Feb,6,"[first, javascript]",[],2,0,[]
5,React Native for Mobile Developers☞ http://bit...,0,/ecma_script6/status/1092789874942529537,0.0,0.0,6:19 AM - 5 Feb 2019,@ecma_script6,6:19 AM,2019,Feb,5,"[javascript, reactjs]",[],2,0,[http://bit.ly/2BwyNhg]
6,Machine Learning with JavaScript : Part 1☞ htt...,5,/CodeFood/status/1092421326575468544,0.0,2.0,5:55 AM - 4 Feb 2019,@CodeFood,5:55 AM,2019,Feb,4,"[javascript, machinelearning]",[],2,0,[http://bit.ly/2tzCtJu]
7,The Complete Angular 5 Essentials Course For B...,0,/angular_geek/status/1093078423243640833,0.0,0.0,1:26 AM - 6 Feb 2019,@angular_geek,1:26 AM,2019,Feb,6,"[javascript, angular]",[],2,0,[https://goo.gl/H2Z6sV]
8,Monthly meeting for local #javascript group at...,0,/dejayabud/status/1093033847925571585,0.0,0.0,10:29 PM - 5 Feb 2019,@dejayabud,10:29 PM,2019,Feb,5,[javascript],[],1,0,[]
9,Day 36: Finished the calculator from yesterday...,3,/MichaelHoumann/status/1093513710742564864,0.0,6.0,6:15 AM - 7 Feb 2019,@MichaelHoumann,6:15 AM,2019,Feb,7,"[ruby, webdevelopment, css, html, webdesign, j...",[],7,0,[https://javascript-caculator-qm8qj71su.now.sh/]
