In [1]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations

# Loading data

In [2]:
df_pro_contacts_adults = pd.read_csv("pro_contacts_adults.csv", index_col=0)
df_pro_contacts_children = pd.read_csv("pro_contacts_children.csv", index_col=0)
df_households = pd.read_csv("households.csv", index_col=0)

# Initializing the graph

We start by defining the classes whose instances will be the nodes of our graph

In [3]:
class Person():
    def __init__(self, person_id, state, age):
        self.person_id = person_id
        self.age = age
        self.state = state
    
    def __str__(self):
        s = "Person #{}".format(person_id)
        return(s)
    
    def __repr__(self):
        s = "Person #{}".format(person_id)
        return(s)

class Adult(Person):
    def __init__(self, person_id, state, job_cat, pro_contacts, company_id):
        super().__init__(person_id, state, "adult")
        self.job_cat = job_cat
        self.pro_contacts = pro_contacts
        self.company_id = company_id

class Child(Person):
    def __init__(self, person_id, state, school_contacts, school_id):
        super().__init__(person_id, state, "child")
        self.job_cat = job_cat
        self.school_contacts = school_contacts
        self.school_id = school_id

We create an empty graph into which we add nodes from the pro_contacts dataframes.

In [20]:
G = nx.Graph()
people = [] # we'll store Person objects here too in order to be able to access them quickly with person_id
for i, row in df_pro_contacts_adults.iterrows(): 
    person_id = row.adult_id
    state = "S"
    job_cat = row.job_cat
    pro_contacts = row.pro_contacts
    company_id = row.company_id
    
    adult_node = Adult(person_id, state, job_cat, pro_contacts, company_id)
    G.add_node(adult_node)
    people.append(adult_node)
    print(people)
    
for i, row in df_pro_contacts_children.iterrows(): 
    person_id = row.child_id
    state = "S"
    school_contacts = row.school_contacts
    school_id = row.school_id
    
    child_node = Child(person_id, state, school_contacts, school_id)
    G.add_node(child_node)
    people.append(child_node)

    


[Person #0]
[Person #1, Person #1]
[Person #2, Person #2, Person #2]
[Person #3, Person #3, Person #3, Person #3]
[Person #4, Person #4, Person #4, Person #4, Person #4]
[Person #5, Person #5, Person #5, Person #5, Person #5, Person #5]
[Person #6, Person #6, Person #6, Person #6, Person #6, Person #6, Person #6]
[Person #7, Person #7, Person #7, Person #7, Person #7, Person #7, Person #7, Person #7]
[Person #8, Person #8, Person #8, Person #8, Person #8, Person #8, Person #8, Person #8, Person #8]
[Person #9, Person #9, Person #9, Person #9, Person #9, Person #9, Person #9, Person #9, Person #9, Person #9]
[Person #10, Person #10, Person #10, Person #10, Person #10, Person #10, Person #10, Person #10, Person #10, Person #10, Person #10]
[Person #11, Person #11, Person #11, Person #11, Person #11, Person #11, Person #11, Person #11, Person #11, Person #11, Person #11, Person #11]
[Person #12, Person #12, Person #12, Person #12, Person #12, Person #12, Person #12, Person #12, Person #12

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



# Creating cliques for households

In [5]:
n_households = max(max(df_pro_contacts_children.index), max(df_pro_contacts_adults.index))
for id_household in range(n_households):
    df_household_adults = df_pro_contacts_adults.loc[df_pro_contacts_adults.index == id_household]
    household_adults = df_household_adults.adult_id
    
    df_household_children = df_pro_contacts_children.loc[df_pro_contacts_children.index == id_household]
    household_children = df_household_children.child_id
    
    household_people_ids = list(household_children) + list(household_adults)
    for edge in combinations(household_people_ids, 2):
        print(edge)
        a = people[edge[0]]
        b = people[edge[1]]
        G.add_edge(a, b)
        G[a, b]["type"] = "household"
        G[a, b]["id"] = id_household
    
    

(6960, 6961)


KeyError: (Person #10044, Person #10044)

# Adding edges for pro contacts

In [None]:
G.edges

In [6]:
people[0]

Person #10044

In [7]:
people[6960]

Person #10044

In [None]:
df_pro_contacts_children

In [None]:
people

In [8]:
len(people)

10045

In [13]:
people

[Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person #10044,
 Person 

In [18]:
df_pro_contacts_adults

Unnamed: 0_level_0,adult_id,job_cat,pro_contacts,company_id
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,Indus_other,52,6.0
0,1,Hotel_Restaurant,114,21.0
1,2,Shops_other,14,45.0
1,3,Administration_schools,33,10.0
2,4,Services_other,770,34.0
...,...,...,...,...
3797,6955,Services_other,743,17.0
3798,6956,Services_other,1254,227.0
3799,6957,Administration_schools,97,80.0
3800,6958,Administration_schools,50,38.0
