In [21]:
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
from ipywidgets import interact, interactive, fixed
from ipywidgets import IntSlider
import networkx as nx
import re

import sys
sys.path.append('..')
from transform.read_transform import read_transform

In [22]:
data = read_transform(path_tweets='../data/TW.csv', path_retweets='../data/RT.csv', 
                      join_method='concat', 
                      hashtags_as_list=False, mentions_as_list=False)
print(data.shape)

(44334, 19)


In [51]:
data.head(3)

Unnamed: 0,RT_of_ID,creation_date,description,entities_hashtags,favourite_count,followers_count,friends_count,full_text,is_retweet,location,mentions,profile_created_at,protected,retweet_count,search_key,tweet_id,user_id,user_name,user_screen_name
0,,2021-04-01 02:06:11+00:00,I retweet #Anime stuff🔥 Follow for a cookie🍪,"anime, food, animefood, hungry, 백종원, 고든램지",0,5064,1,RT @byOceanEyes: OceanEyes\nonly for anime foo...,True,Animeland,'byOceanEyes',2018-07-24 18:53:54+00:00,False,115,#food,1377442135604084742,1021830850357473280,RE:AnimeTron,ReAnimetron
1,,2021-04-01 02:06:03+00:00,,"Food, FoodPorn, FoodPorn",0,16037,9925,"Might not look as appetizing as other posts, b...",False,,,2015-08-01 07:11:08+00:00,False,0,#food,1377442100900421632,3303170203,Dining Cooking,DiningCooking
2,,2021-04-01 02:05:30+00:00,NYT/#1 int'l bestselling author THOSE WHO SAVE...,"poetry, food, PoetsCafe",0,6090,758,"Come for the #poetry, stay for the #food—or vi...",False,"Boston, MA","'AMIGHTYBLAZE', 'fascicles'",2009-09-02 04:30:11+00:00,False,0,#food,1377441963746721796,70882212,Jenna Blum,Jenna_Blum


## Get all the mentions in a dictionary

In [106]:
tmp = data[['mentions', 'user_screen_name']]
tmp = tmp[tmp['mentions'].isnull() == False]
tmp.loc[:,'mentions'] = tmp.loc[:,'mentions'].str.replace("'", "").str.split(', ')
print(tmp.shape)

tmp = tmp.set_index('user_screen_name')
tmp.head(2)

(34171, 2)


Unnamed: 0_level_0,mentions
user_screen_name,Unnamed: 1_level_1
ReAnimetron,[byOceanEyes]
Jenna_Blum,"[AMIGHTYBLAZE, fascicles]"


In [107]:
mentions = tmp.to_dict()['mentions']
pd.Series(mentions)[:2]

ReAnimetron                [byOceanEyes]
Jenna_Blum     [AMIGHTYBLAZE, fascicles]
dtype: object

In [108]:
import itertools
dict(itertools.islice(mentions.items(), 3))

{'ReAnimetron': ['byOceanEyes'],
 'Jenna_Blum': ['AMIGHTYBLAZE', 'fascicles'],
 'tbevents_': ['MrsFreshFeet']}

## Get all the attributes in a dictionary

In [109]:
tmp = data[['user_screen_name', 'followers_count']].set_index('user_screen_name')
follower_counts = tmp.to_dict()['followers_count']
pd.Series(follower_counts)[:2]

ReAnimetron       5064
DiningCooking    16020
dtype: int64

In [110]:
dict(itertools.islice(follower_counts.items(), 3))

{'ReAnimetron': 5064, 'DiningCooking': 16020, 'Jenna_Blum': 6090}

## Get Retweet count in a dictionary

In [111]:
tmp = data[['user_screen_name', 'retweet_count']].groupby('user_screen_name').mean()
retweet_counts = tmp.to_dict()['retweet_count']
print(pd.Series(retweet_counts)[:2])
len(retweet_counts)

004clinden    3.1
007201_       1.0
dtype: float64


18626

In [112]:
dict(itertools.islice(retweet_counts.items(), 3))

{'004clinden': 3.1, '007201_': 1.0, '0123Mara': 84.0}

## Get Activity leve of users

In [113]:
tmp = data.drop_duplicates(subset=['tweet_id','user_screen_name'])['user_screen_name'].value_counts()
user_activity = tmp.to_dict()
print(pd.Series(user_activity)[:3])
len(user_activity)

researchmrx       929
CounterIreland    326
WhatsOnOLIO       252
dtype: int64


18626

## Create Graph-File

In [114]:
G = nx.DiGraph()

In [115]:
# Add nodes
for key, mention in mentions.items():
    G.add_node(key, followers_count=follower_counts[key])
    if type(mentions) != float:
        for m in mentions:
            G.add_node(m, followers_count=follower_counts[m])

In [116]:
len(G.nodes())

13633

In [119]:
pd.Series(nx.get_node_attributes(G,'followers_count'))[:5]

ReAnimetron     5064
Jenna_Blum      6090
tbevents_        822
DGSpeaks       10223
lealalicia      1035
dtype: int64

### Graph with more attributes: followers, retweets, user activity

In [120]:
G = nx.DiGraph()

In [121]:
# Add nodes
for key, mention in mentions.items():
    G.add_node(key, followers_count=follower_counts[key], user_activity=user_activity[key], retweet_counts=retweet_counts[key])
    if type(mentions) != float:
        for m in mentions:
            G.add_node(m, followers_count=follower_counts[m], user_activity=user_activity[m], retweet_counts=retweet_counts[m])

In [122]:
len(G.nodes())

13633

In [123]:
pd.Series(nx.get_node_attributes(G, 'followers_count'))['researchmrx']

7104

In [124]:
pd.Series(nx.get_node_attributes(G, 'user_activity'))['researchmrx']

929

In [125]:
pd.Series(nx.get_node_attributes(G, 'retweet_counts'))['researchmrx']

1.7839681637293916

In [126]:
# Add edges
for key, val in mentions.items():
    if type(val) != float:
        for v in val:
            G.add_edge(key, v)

In [127]:
nx.write_gexf(G, path='./myData/many_attr.gexf')