# 02806 Final project 
> An analysis and visualization for security people using Twitter data.

- toc: true 
- badges: false
- author: Peter Bom Jakobsen & Søren Fritzbøger & Yucheng Ren 
- comments: false
- categories: [data_analysis, network]

> Important: The dataset we were used to create the network comes from Twitter, you can view and download them from [here](https://raw.githubusercontent.com/Glorforidor/SocialGraphAssignments/master/twitter_data.zip). The Explainer [notebook]().

In [2]:
# hide
# Standard libraries.
import collections
import csv
from functools import wraps
import math
import os
import os.path

# Third party libraries.
from fa2 import ForceAtlas2
import networkx as nx
import nltk
from nltk import word_tokenize
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import texttable
from wordcloud import WordCloud

In [3]:
# hide
# Filenames of all the data files which makes up our dataset.
tweets_filename = "tweets.csv"
id_to_screen_name_filename = "id_to_screen_name.csv"
user_and_friends_filename = "user_and_friends_ids.csv"
user_to_friend_filename = "user_to_friend_screen_names.csv"
bios_filename = "bios.csv"
sentiment_tweets_filename = "sentiment_tweets.csv"
communities_filename = "communities.csv"
top_5_communities_filename = "top_5_communities.csv"

# The saved graph - it is an undirected graph.
graph_filename = "security_network.gml"

In [4]:
# hide
g = nx.read_gml(graph_filename)
node_sizes = [d for __, d in g.degree]

![](imgs/network.png)

![](imgs/community_size.png)

In [8]:
# hide_input
# The top 10 Twitter profiles with most friends - in network term highest degree.
table = texttable.Texttable()
table.set_cols_align(["l", "r"])
table.set_cols_valign(["t", "b"])
table.add_row(["Name", "Friends"])

for screen_name, degree in sorted(g.degree, key=lambda x: x[1], reverse=True)[:10]:
    table.add_row([screen_name, degree])

print(table.draw())

+------------------+---------+
| Name             | Friends |
+------------------+---------+
| @HackingDave     |     216 |
+------------------+---------+
| @AlyssaM_InfoSec |     198 |
+------------------+---------+
| @RayRedacted     |     193 |
+------------------+---------+
| @NicoleBeckwith  |     178 |
+------------------+---------+
| @DfirDiva        |     170 |
+------------------+---------+
| @sherrod_im      |     161 |
+------------------+---------+
| @cybergeekgirl   |     161 |
+------------------+---------+
| @gabsmashh       |     160 |
+------------------+---------+
| @LisaForteUK     |     158 |
+------------------+---------+
| @UK_Daniel_Card  |     154 |
+------------------+---------+


![](imgs/wordcloud1.png)

![](imgs/wordcloud2.png)

In [16]:
# hide
url = "https://ndownloader.figstatic.com/files/360592"
words_of_happiness = pd.read_csv(url, delimiter="\t", skiprows=3)


def compute_average_sentiment(tokens):
    """compute_average_sentiment returns the average sentiment value of the tokens.
    
    Each token in tokens must be in lowercase.
    """
    sentiment = 0.0
    if not len(tokens):
        return sentiment

    avg = np.nan_to_num(words_of_happiness[words_of_happiness["word"].isin(tokens)]["happiness_average"].mean())
    return avg


communities = {i: set(members) for i, members in enumerate(top_5_largest_communites)}
text_of_communities = collections.defaultdict(str)
with open("sentiment_tweets.csv", newline="") as f:
    csv_reader = csv.DictReader(f)
    for row in csv_reader:
        for i, members in communities.items():
            if row["screen_name"] in members:
                text_of_communities[i] += f" {row['tweets']}"

sentiment_of_communities = {k: compute_average_sentiment(bag_of_words(v)) for k, v in text_of_communities.items()}

In [17]:
# hide_input
table = texttable.Texttable()
table.set_cols_align(["l", "r"])
table.set_cols_valign(["t", "b"])
table.set_precision(2)
table.add_row(["Community", "Sentiment value"])

for com, sentiment in sorted(sentiment_of_communities.items()):
    table.add_row([com+1, sentiment])

print(table.draw())

+-----------+-----------------+
| Community | Sentiment value |
+-----------+-----------------+
| 1         |            5.46 |
+-----------+-----------------+
| 2         |            5.44 |
+-----------+-----------------+
| 3         |            5.52 |
+-----------+-----------------+
| 4         |            5.51 |
+-----------+-----------------+
| 5         |            5.46 |
+-----------+-----------------+
