# $$\text{Minerva Schools Class of 2021}$$ 
## $$\text{Messenger Group Chat}$$

The project serves as a basis for analyzing social network dynamics on real-world data extracted from the Facebook group chat of the Minerva class of 2021. The cohort has over 192 students representing over 100 countries which provides a perfect setting for analyzing the evolution of social interactions since the creation of the group in September 2017 until the end of junior year (May 2020). Over the mentioned time period, we produced 60,000 text with roughly 542,960 words and 21457 reactions.

In [6]:
# Importing necessary packages

import json
import numpy as np
import pandas as pd
from datetime import datetime
from collections import Counter
from itertools import chain

In [3]:
# Loading the data
data = []
for i in range(1, 8):
    with open('C:/Users/Taha/Desktop/m21_facebook/message_'+str(i)+'.json')\
    as data_temp: data_temp = json.load(data_temp, encoding='UTF-32')
    data += data_temp['messages']
print('\nNumber of messages:', len(data))

# Format timestamp (Time in UTC)
def time_format(timestamp): 
    return datetime.utcfromtimestamp(timestamp/1000).strftime('%Y-%m-%d %H:%M:%S')
for i in data: i['timestamp_ms'] = time_format(i['timestamp_ms'])

# Participants' names
student_names = [data_temp['participants'][i].get('name') for i in range(len(data_temp['participants']))]
print('\nThe number of people in the chat:', len(student_names),'\n')


Number of messages: 62208

The number of people in the chat: 194 



In [4]:
# Reaction encoding and decoding

reaction_codes, reaction_actors = [], []
for i in data: 
    if 'reactions' in i: reaction_codes.append([j['reaction'] for j in i['reactions']])

reactions = np.array(sum(reaction_codes, []))
encoding = {'ð\x9f\x98®':'wow', 'ð\x9f\x98\x8d':'heart_eyes', 
            'ð\x9f\x91\x8e':'thumbs_down', 'ð\x9f\x98\x86':'laugh',
            'ð\x9f\x91\x8d':'thumbs_up', 'ð\x9f\x98¢':'cry', 'â\x9d¤':'heart',
            'ð\x9f\x8f\x86':'trophy', 'ð\x9f\x98\xa0':'angry'}

print('\nNumber of reactions:', len(reaction_codes), '\nNumber of uniques:', len(np.unique(reactions)))

# Decoding the reactions for all messages
for i in data: 
    if 'reactions' in i: 
        for j in i['reactions']: j['reaction'] = encoding.get(j['reaction'])

for i in data:
    if 'reactions' in i: reaction_actors.append([(j['actor'], i['sender_name']) for j in i['reactions']])
reaction_actors = sum(reaction_actors, [])

print('\n\nNumber of unique pairs: ', len(set(reaction_actors)), 
      '\nNumber of interactions: ', len(reaction_actors),'\n')


Number of reactions: 21457 
Number of uniques: 9


Number of unique pairs:  13461 
Number of interactions:  55681 



In [7]:
react_dict = Counter(chain(reaction_actors))
final_data = pd.DataFrame(list(react_dict.keys()), columns=['sender', 'reactor'])
final_data['n_interactions'] = react_dict.values()

In [20]:
import networkx as nx
graph = nx.DiGraph()
graph.add_edges_from(sorted(react_dict.keys()))
print(nx.info(graph))

total_reacts = []
for i in student_names:
    total_reacts.append([i, sum(final_data[final_data.reactor==i].n_interactions)])
total_reacts = dict(total_reacts)

def get_react(name):
    if type(total_reacts.get(name)) == type(None): return 0
    else: return total_reacts.get(name)
    
    
for idx, edge in enumerate(graph.edges):
    graph.edges[edge]['value'] = int(sorted(react_dict.values())[idx]/(1+get_react(edge[0]) + get_react(edge[1])))

for idx, node in enumerate(graph.nodes):
    graph.nodes[node]['size'] = [i[1] for i in dict(graph.degree).items()][idx]

Name: 
Type: DiGraph
Number of nodes: 210
Number of edges: 13461
Average in degree:  64.1000
Average out degree:  64.1000


In [35]:
# Interactive network

from pyvis.network import Network
G = Network(height="800px", width="100%", notebook=False, bgcolor="#222222", font_color="white")
G.toggle_hide_edges_on_drag(False)
G.barnes_hut()

size = [i[1] for i in sorted(dict(graph.degree).items())]
for idx,i in enumerate(sorted(dict(graph.degree).keys())):
    G.add_node(i, value=size[idx], label=' ', title=i)

for source,target,edge_attrs in graph.edges(data=True):
    G.add_edge(source, target, **edge_attrs)
    
    
#G.add_edges(graph.edges)
#G.show_buttons(filter_=True)


G.set_options('''
var options = {
  "nodes": {
    "borderWidth": 0,
    "borderWidthSelected": 1,
    "shadow": {
      "enabled": true
    },
    "color": {
      "highlight": {
        "border": "rgba(233,33,28,1)",
        "background": "rgba(255,94,100,1)"
      }
    }
    
  },
  "edges": {
    "color": {
      "highlight": "rgba(216,83,84,1)",
      "inherit": true
    },
    "shadow": {
      "enabled": true
    },
    "smooth": {
      "type": "cubicBezier",
      "forceDirection": "none"
    },
    "width": 0
  },
  "interaction": {
    "multiselect": true
  },
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -80000,
      "springLength": 250,
      "springConstant": 0.001
    },
    "minVelocity": 0.75
  }
}
''')
G.show('messenger_network.html')