In [47]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import matplotlib
import re

In [48]:
filepath = './tweets.csv'

df = pd.read_csv(filepath, delimiter=',',
                skiprows=1, # skips the first line of the file
                encoding='unicode_escape', # prevents errors on unknown symbols
                usecols=['Vertex 1', 'Vertex 2', 'Tweet'], # only import the columns we use
                )
df.columns = ['source_user', 'mentioned_user', 'tweet_text']

print(df)

         source_user   mentioned_user  \
0           notherle        dude_stay   
1           notherle       facikwacik   
2           notherle     crazycold739   
3           notherle       filippecak   
4           notherle      u_yousolame   
..               ...              ...   
842     bulletking61     bulletking61   
843     mx_hippakrit     mx_hippakrit   
844  shers_wird_stuf  shers_wird_stuf   
845   fastblueapple9  shers_wird_stuf   
846     centurionfox            steam   

                                            tweet_text  
0    RT @dude_stay: Plz help us save the this game ...  
1    RT @FacikWacik: My favorite game is dying ????...  
2    RT @CraZyCold739: I associate with this game n...  
3    RT @FilipPecak: Hi people. Let's save #SafeTF2...  
4    RT @u_yousolame: I been playing tf2 for a few ...  
..                                                 ...  
842              #SafeTF2  PLZZZZZZZZZZZZZZZZZZZZZZZZZ  
843  I know I'm late for this but #SafeTF2 (':\n\nh

In [49]:
# add nodes? first export df to list/dictionary then transform? or easier way?

In [50]:
G = nx.DiGraph()
colors = []
for source_user, mentioned_user, tweet_text in zip(df.source_user, df.mentioned_user, df.tweet_text):

    if source_user != mentioned_user:
        G.add_edge(source_user, mentioned_user)
        if "RT" in tweet_text[:3]:
            colors.append("black")
        else:
            colors.append("red")

node_colors = np.arange(0.0, 1.0, 1/G.number_of_nodes())
print(len(node_colors))

344


In [51]:
G.number_of_nodes()

344

In [52]:
G.number_of_edges()

434

In [53]:
measures = {"centrality": {'in_degree': nx.in_degree_centrality(G), 'out_degree': nx.out_degree_centrality(G),
                           'betweenness': nx.betweenness_centrality(G), 'closeness': nx.closeness_centrality(G)}}
out_degrees = []
for key, value in nx.out_degree_centrality(G).items():
    out_degrees.append(value)

minima = min(out_degrees)
maxima = max(out_degrees)

norm = matplotlib.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=cm.Greys_r)
node_colors = []
for v in out_degrees:
    node_colors.append(mapper.to_rgba(v)[0])


In [85]:
def count_dictionary(persons):
    count = 0
    for key, value in persons.items():
        count += value
    print(count)

def print_max(persons):
    max_word = max(persons, key=persons.get)
    print(max_word)
    print(persons[max_word])

# Analyze the network
I_words = ["i", "me", "my", "mine"]
you_words = ["you", "yours"]
we_words = ["we", "us", "our", "ours"]

# Calculate per person
persons_i_words = dict()
persons_you_words = dict()
persons_we_words = dict()
tweets_person = dict()
words_person = dict()
for index, row in df.iterrows():
    # count tweets per person
    tweets_person[row['source_user']] = tweets_person.get(row['source_user'], 0) + 1
    
    #split the text on punctuation. 
    split_text = re.split(r'[,;!?:" .\']', row['tweet_text'])
    for word in split_text:
        # count words per person
        words_person[row['source_user']] = words_person.get(row['source_user'], 0) + 1
        
        # count i/you/we words per person
        if word.lower() == 'i':
            persons_i_words[row['source_user']] = persons_i_words.get(row['source_user'], 0) + 1
        elif word.lower() == 'you':
            persons_you_words[row['source_user']] = persons_you_words.get(row['source_user'], 0) + 1
        elif word.lower() == 'we':
            persons_we_words[row['source_user']] = persons_we_words.get(row['source_user'], 0) + 1

#print(num_tweets_person_tweets_person)
print_max(persons_i_words)
print_max(persons_you_words)
print_max(persons_we_words)

# Generate one complete dictionary
complete_person = dict()
for person, value in words_person.items():
    complete_person[person] = {}
    complete_person[person]['words'] = value

for person, value in persons_i_words.items():
    complete_person[person]['i_words'] = value
for person, value in persons_you_words.items():
    complete_person[person]['you_words'] = value
for person, value in persons_we_words.items():
    complete_person[person]['we_words'] = value
for person, value in tweets_person.items():
    complete_person[person]['num_tweets'] = value

print(complete_person)

simon_word
45
simon_word
5
simon_word
9
{'notherle': {'words': 234, 'i_words': 6, 'you_words': 2, 'we_words': 1, 'num_tweets': 10}, 'joseeliansalva1': {'words': 1, 'num_tweets': 1}, 'williw80472984': {'words': 28, 'i_words': 1, 'num_tweets': 1}, 'margisstupid': {'words': 27, 'num_tweets': 1}, 'wisco1180': {'words': 27, 'num_tweets': 1}, 'alexsha71075971': {'words': 4, 'num_tweets': 1}, 'kubekposting': {'words': 12, 'i_words': 1, 'num_tweets': 1}, 'edgaror37098062': {'words': 2, 'num_tweets': 1}, 'amoru01369152': {'words': 107, 'num_tweets': 1}, 'j__peg': {'words': 31, 'i_words': 2, 'num_tweets': 1}, 'rainman50258089': {'words': 12, 'num_tweets': 2}, 'gd0ud': {'words': 31, 'i_words': 2, 'num_tweets': 1}, 'rubysunseto': {'words': 31, 'i_words': 2, 'num_tweets': 1}, 'isabourian': {'words': 21, 'num_tweets': 3}, 'snastheweirdone': {'words': 4, 'num_tweets': 1}, 'cyber_pixeeel': {'words': 27, 'num_tweets': 1}, '98tremor': {'words': 13, 'num_tweets': 1}, 'brahmfunny': {'words': 3, 'we_words'

In [None]:
plt.figure(figsize=(20, 20), dpi= 200)
nx.draw_networkx(G= G, edge_color=colors, node_color=node_colors, with_labels=False)