In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gzip
import json
import time
from datetime import datetime

import warnings
warnings.filterwarnings("ignore")

In [None]:
#filename = "/content/drive/MyDrive/ADM/HW5/"
filename = "/content/drive/MyDrive/HW5/"

# Exercise 1

### READ DATA

In [None]:
a2q = pd.read_csv(filename + 'sx-stackoverflow-a2q.txt.gz', compression='gzip', header=None, sep=' ', quotechar='"', error_bad_lines=False)
c2a = pd.read_csv(filename + 'sx-stackoverflow-c2a.txt.gz', compression='gzip', header=None, sep=' ', quotechar='"', error_bad_lines=False)
c2q = pd.read_csv(filename + 'sx-stackoverflow-c2q.txt.gz', compression='gzip', header=None, sep=' ', quotechar='"', error_bad_lines=False)

###Trasform timestamp in date

In [None]:
a2q[2] = pd.to_datetime(a2q[2], unit='s').dt.strftime("%Y%m%d").astype(int)
c2a[2] = pd.to_datetime(c2a[2], unit='s').dt.strftime("%Y%m%d").astype(int)
c2q[2] = pd.to_datetime(c2q[2], unit='s').dt.strftime("%Y%m%d").astype(int)

### Drop edge on themselve

In [None]:
c2q.drop(c2q[c2q[0] == c2q[1]].index, inplace = True)
c2a.drop(c2a[c2a[0] == c2a[1]].index, inplace = True)
a2q.drop(a2q[a2q[0] == a2q[1]].index, inplace = True)

### Subset of dataset

In [None]:
subset_a2q = a2q[a2q[2] < 20090101].copy()
subset_c2a = c2a[c2a[2] < 20090101].copy()
subset_c2q = c2q[c2q[2] < 20090101].copy()

In [None]:
str(a2q[2].head(1)[0])[:-2]

'200808'

In [None]:
class Relation:
    def __init__(self, type_relation, time, source, target, weight):
        self.type_relation_ = type_relation
        self.time_ = time
        self.source_ = source.get_ID
        self.target_ = target.get_ID
        self.weight_ = weight

    @property
    def get_type(self):
        return self.type_relation_
    
    @property
    def time(self):
        return self.time_
    
    @property
    def target(self):
        return self.target_
    
    @property
    def source(self):
        return self.source_
    
    def set_weight(self, weight):
        self.weight_ = weight
    
    @property
    def weight(self):
        return self.weight_
    
    def __str__(self): 
        return "{\"type_relation\": \"" + self.type_relation_ + "\", \"time\": " + str(self.time_) + ", \"source\": " + str(self.source_) + ", \"target\": " + \
        str(self.target_) + ", \"weight\": "+ str(self.weight_) + "}"
    
    def __repr__(self): 
        return self.__str__()
     

class User:
    def __init__(self, ID_user):
        self.ID_user = ID_user
        self.in_relation = dict()
        self.out_relation = dict()

    def add_in_relation(self, in_relation):
        if in_relation.time in self.in_relation:
            if in_relation.get_type not in self.in_relation[in_relation.time]:
                self.in_relation[in_relation.time][in_relation.get_type] = []
        else:
            self.in_relation[in_relation.time] = {in_relation.get_type: []}
        self.in_relation[in_relation.time][in_relation.get_type].append(in_relation)

    
    def add_out_relation(self, out_relation):
        if out_relation.time in self.out_relation:
            if out_relation.get_type not in self.out_relation[out_relation.time]:
                self.out_relation[out_relation.time][out_relation.get_type] = []
        else:
            self.out_relation[out_relation.time] = {out_relation.get_type: []}
        self.out_relation[out_relation.time][out_relation.get_type].append(out_relation)
    
    def set_in_relation(self, inRelations):
        self.in_relation = inRelations
    
    def set_out_relation(self, outRelation):
        self.out_relation = outRelation

    @classmethod
    def from_json(cls, json):
        return cls(json["ID_user"])
    
    @property
    def get_ID(self):
        return self.ID_user

    @property
    def get_in_relation(self):
        return self.in_relation
    
    @property
    def get_out_relation(self):
        return self.out_relation

    def __str__(self):
        return "{\"in_relation\": " + str(self.in_relation) +  ", \"out_relation\": " + str(self.out_relation) + "}"

    def to_json(self):
        return {"in_relation": self.in_relation, "out_relation":self.out_relation }

    def __repr__(self): 
        return self.__str__()

In [None]:
def get_user(user, dict_users):
    if user not in dict_users:
        user_obj = User(user)
        dict_users[user] = user_obj
    return dict_users[user], dict_users

In [None]:
def create_graph(df, type_node, weight, users):
    for index, row in df.iterrows():
        year_month = int(str(row[2])[:-2])
        source_ = int(row[1])
        target_ = int(row[0])
        source, users = get_user(source_, users)
        target, users = get_user(target_, users)
        rel = Relation(type_node, year_month, source, target, weight)
        users[target_].add_in_relation(rel)
        users[source_].add_out_relation(rel)    
    return users

In [None]:
users = dict()
users = create_graph(subset_a2q, "a2q", 1, users)
users = create_graph(subset_c2a, "c2a", 2, users)
users = create_graph(subset_c2q, "c2q", 3, users)

In [None]:
import pickle
with open(filename + 'graph.pickle', 'wb') as fp:
    pickle.dump(users, fp, pickle.HIGHEST_PROTOCOL)

# LOAD DATA FROM FILE

In [None]:
import pickle
filename = '/content/drive/MyDrive/HW5/graph.pickle'
with open(filename, 'rb') as handle:
    graph = pickle.load(handle)

In [None]:
graph[3].get_in_relation[200808]['a2q']

[{"type_relation": "a2q", "time": 200808, "source": 2, "target": 3, "weight": 1},
 {"type_relation": "a2q", "time": 200808, "source": 230, "target": 3, "weight": 1},
 {"type_relation": "a2q", "time": 200808, "source": 257, "target": 3, "weight": 1},
 {"type_relation": "a2q", "time": 200808, "source": 1223, "target": 3, "weight": 1},
 {"type_relation": "a2q", "time": 200808, "source": 1414, "target": 3, "weight": 1}]

In [None]:
graph[3].get_in_relation[200808]['a2q'][0].set_weight(6)

In [None]:
graph[3].get_in_relation[200808]['a2q'][0]

{"type_relation": "a2q", "time": 200808, "source": 2, "target": 3, "weight": 6}

In [None]:
graph[2].get_out_relation#[200808]['a2q']

{200808: {'a2q': [{"type_relation": "a2q", "time": 200808, "source": 2, "target": 48, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 60, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 144, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 227, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 3, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 721, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 5, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 960, "weight": 1},
   {"type_relation": "a2q", "time": 200808, "source": 2, "target": 3254, "weight": 1}]},
 200809: {'a2q': [{"type_relation": "a2q", "time": 200809, "source": 2, "target": 10117, "weight": 1},
   {"type_relation": "a2q", "time": 200809, "source": 2, "target": 15487, "weight": 1},
   {"type_relation": "a

# EX 2

In [None]:
def call_function(name_graph, i):
    if i == 1:
        return get_features(name_graph)

# EX 2.1

In [None]:
s = {1,2,3,4}

True

In [None]:
import plotly.graph_objects as go

In [None]:
def get_features(name_graph):
    type_graph = "directed"
    users = set()
    number_rel = 0 
    for user in graph:
        for year in graph[user].get_out_relation:
            if name_graph in graph[user].get_out_relation[year]:
                for relation in graph[user].get_out_relation[year][name_graph]:
                    users.add(relation.target)
                    users.add(relation.source)
                    number_rel += 1

    average_link_user = round(number_rel / len(users), 1)
    density_degree = round(number_rel / (len(users) * (len(users) - 1)), 2)
    rowEvenColor = 'lightgrey'
    rowOddColor = 'white'
    type_graph = "DENSE" if density_degree >= 0.5 else "SPARSE"
    first =  ["Directed", "Number of users", "Number of answers/comments", "Average number of links per user", "Density degree of the graph", "Type of graph"]
    second = ["True", str(len(users)), str(number_rel), str(average_link_user), str(density_degree), type_graph]

    fig = go.Figure(data=[go.Table(
                header=dict(values= ["Request", "Response"], line_color='darkslategray', align=['left','center'],
                    fill_color='grey', font=dict(color='black', size=11)
                    ), 
                cells = dict(values=[first, second], line_color='darkslategray',
                        fill_color = [[rowOddColor,rowEvenColor,rowOddColor, rowEvenColor,rowOddColor, rowEvenColor]*2],font=dict(color='darkslategray', size=11),
                         align=['left', 'center'])
                                  )    
                         ])
    
    fig.update_layout(width=400, height=450)
    fig.show(width=300, height=200)

    #return len(users)

In [None]:
call_function("a2q", 1)

# EX 2.2

In [None]:
def find_min_cost(dict_relation):
    for user in graph:
        for year in graph[user].get_out_relation:
            for relation in graph[user].get_out_relation[year]:
                for relation in graph[user].get_out_relation[year][relation]:

In [None]:
def myDijkstra(graph, source, target):
    visited = set()
    unvisited = set(graph.keys())
    dist = dict()
    prev = dict()

    for u in unvisited:
        dist[u] = float('inf')
        prev[u] = -1

    dist[source] = 0
    current_node = source
    #minimum = dist[min(unvisited)]
    while len(unvisited) > 0: #TO ADD: loop not connected
        # setto i costi a seconda di quelli che ho
        for node in unvisited:
            if minimum > dist[node]:
                minimum = dist[node] 
                current_node = node

        unvisited.remove(current_node)
        neighbor = getNeighbors(current_node,graph)

        for u in unvisited.intersection(neighbor):
            new_dist = dist[current_node] + overallWeight(current_node,u)
            if new_dist < dist[u]:
                dist[u] = new_dist
                prev[u] = current_node 

    return getShortestPath(source, target, prev, dist)

# EX4

In [None]:
def assign_dormitory_to_child(child, dormitory_1, dormitory_2, not_with, children):
    assign = False
    if child not in dormitory_1 and child not in dormitory_2: #if no dormitory has been assigned to child 
        if not_with in dormitory_1:
            dormitory_2.add(child)
        else:
            dormitory_1.add(child)
        assign = True
        children.remove(child)
    return assign, dormitory_1, dormitory_2, children
    

In [None]:
def create_pair(n, list_pair):
    children = {i for i in range(n)} #set of children to be assigned a dormitory 
    dormitory_1 = set()
    dormitory_2 = set()
    for child_1, child_2 in list_pair:
        assign_1, dormitory_1, dormitory_2, children = assign_dormitory_to_child(child_1, dormitory_1, dormitory_2, child_2, children)
        assign_2, dormitory_1, dormitory_2, children = assign_dormitory_to_child(child_2, dormitory_1, dormitory_2, child_1, children)
        if not assign_1 and not assign_2:
            return "It's not possibile to divide the kids in two dormitories."
    for i, child in enumerate(children):
        if i % 2 == 0:
            dormitory_1.add(child)
        else:
            dormitory_2.add(child)     
    return dormitory_1, dormitory_2

In [None]:
l = [(1,2),(1,3),(2,3)]

In [None]:
create_pair(4,l)

"It's not possibile to divide the kids in two dormitories."

In [None]:
l = [(4, 8), (0, 2)]
create_pair(10,l)

({0, 1, 4, 5, 7}, {2, 3, 6, 8, 9})

In [None]:
l = [(0,1), (1,2)]
create_pair(3,l)

({0, 2}, {1})

In [None]:

my_dict_of_dicts = {0: {1: 10, 2: 1}, 1: {2: 1}, 2: {3: 1}, 3: {1: 1}}

for k, d in my_dict_of_dicts.items():
    print(k,d)

0 {1: 10, 2: 1}
1 {2: 1}
2 {3: 1}
3 {1: 1}
