# Parse networks

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from datetime import timedelta
import itertools

In [2]:
def ConstructNetwork(data, start_week):
    '''
    Constructs a multi-graph: each node is an inhabitant and each link is a 
    shared living area. Two nodes can have more than one link if they share more
    than one living area (for example shared room and shared Wohnbereich).
    Currently, living areas are counted as "shared" if two inhabitants shared
    them in the given "start_week" or the week after
    '''
    G = nx.MultiGraph()

    # look at the week of the first infection and the week after
    for week in [start_week, start_week + timedelta(7)]:
        week_data = data[data['week'] == week]

        areas = ['Wohnbereich', 'Tischnummer', 'Zimmernummer']
        for area in areas[0:1]:

            # iterate over all sub-units of the given area, for example the
            # different room or table numbers
            for sub_area in week_data[area].unique():
                sub_area_data = week_data[week_data[area] == sub_area]
                # create a list of all possible ID-tuples from all IDs that share
                # a given sub-area (e.g. all inhabitants that share a room or
                # all inhabitants that have lunch on the same table)
                ID_tuples = [tuple(i) for \
                        i in itertools.combinations(sub_area_data['ID'].values, 2)]
                # add all the links from the sub area to the graph
                for tup in ID_tuples:
                    edge_data = G.get_edge_data(tup[0], tup[1])
                    if edge_data == None:
                        G.add_edge(tup[0], tup[1], area=area)
                    elif area not in [i['area'] for i in  G.get_edge_data(tup[0], tup[1]).values()]:
                        G.add_edge(tup[0], tup[1], area=area)
                    else:
                        pass
    return G

In [3]:
def ConstructInteractionTable(data, G, start_week):
    '''
    Constructs a table of links and a binary infection outcome variable that
    records if an inhabitant got infected in the two weeks following the start
    week
    '''
    week_data = data[data['week'] == start_week]
    infected = week_data[week_data['Covid19_pos'] == 1]['ID'].values
    susceptibles = week_data[week_data['Covid19_pos'] == 0]['ID'].values

    # construct contact data table
    contact_df = pd.DataFrame(columns=['{}_link'.format(a) for a in areas])
    for a in areas:
        contact_df['{}_link'.format(a)] = [0] * len(susceptibles)
    contact_df.index = susceptibles

    # add contacts to infected inhabitants: iterate over all susceptible and
    # already infected inhabitants and check if they shared a living area
    for s in susceptibles:
        for i in infected:
            edge_data = G.get_edge_data(s, i)
            if edge_data != None: # does at least one link exist?
                for area in [val['area'] for val in edge_data.values()]:
                    contact_df.loc[s, '{}_link'.format(area)] = 1

    # determine, if inhabitant was infected in the subsequent two weeks
    contact_df['transmission'] = 0
    for week in [start_week + timedelta(7), start_week + timedelta(14)]:
        newly_infected = data[(data['week'] == week) & (data['Covid19_pos'] == 1)]
        for s in susceptibles:
            if s in newly_infected['ID'].values:
                contact_df.loc[s, 'transmission'] = 1
                
    return contact_df

## WB 2

In [4]:
WB = 2

data = pd.read_csv('../../data/nursing_homes/WB{}_combined_clean.csv'.format(WB),
                  usecols=['week', 'ID', 'Wohnbereich', 'Tischnummer', 
                           'Zimmernummer', 'Covid19_pos'],
                  parse_dates=['week'])

weeks = list(data['week'].unique())
weeks.sort()

for week in weeks:
    week_data = data[data['week'] == week]
    if week_data['Covid19_pos'].sum() > 0:
        print(week)
        print(week_data[week_data['Covid19_pos'] == 1]['ID'])
        print()

2020-06-08T00:00:00.000000000
590     660d2de3d6304dbb8df845a970b2ef60
772     0be377fd073143d09f6081b34faf9b38
1110    a04f301c26e345d7a84b22cc02d0c847
Name: ID, dtype: object

2020-06-15T00:00:00.000000000
591     660d2de3d6304dbb8df845a970b2ef60
1059    90e9824026fc4c4ba3eee6de9525cbd3
Name: ID, dtype: object

2020-06-22T00:00:00.000000000
748    3fb1008c4419476398fd5a0ca2410dea
Name: ID, dtype: object



In [164]:
start_week = pd.to_datetime('2020-06-08')
areas = ['Wohnbereich', 'Tischnummer', 'Zimmernummer']

G = ConstructNetwork(data, start_week)
contact_df = ConstructInteractionTable(data, G, start_week)

## WB 4

In [181]:
WB = 4

data = pd.read_csv('../../data/nursing_homes/WB{}_combined_clean.csv'.format(WB),
                  usecols=['week', 'ID', 'Wohnbereich', 'Tischnummer', 
                           'Zimmernummer', 'Covid19_pos'],
                  parse_dates=['week'])

weeks = list(data['week'].unique())
weeks.sort()

start_week = None
for week in weeks:
    week_data = data[data['week'] == week]
    if week_data['Covid19_pos'].sum() > 0:
        # detect first outbreak
        if start_week == None:
            start_week = pd.to_datetime(week)
        print(week)
        print(week_data[week_data['Covid19_pos'] == 1][['ID', 'Tischnummer']])
        print()

2020-06-01T00:00:00.000000000
                                   ID  Tischnummer
17   a74c827f2b664ef5b60f7d4aa50cf416          NaN
69   be323f9339ee48de827ed4961a297864          NaN
199  c592f3663d4b4416bd1294dc15eafdf8          NaN
251  c54b6b3159bc4bdca96b64ef250b1b94          NaN
641  eda9705dc3b247c4836d69b5c252969d          NaN
719  c7e509d005f34d56bc325c018d122e0d          NaN
771  0a47a4aef6bc40518d25c82bd57e4659          NaN
823  b8487f39683d4e57880c07bfb68076f2          NaN
849  1f9024a3611147a8bdf46267ebe49818          NaN
875  a2db34184779453194a14d2ed8fb7fe9          NaN
927  a641812985a54fc2b846309a60bb61c8          NaN

2020-06-08T00:00:00.000000000
                                    ID  Tischnummer
356   c0f2c009cdab4c4c83fc71e8bb9f4f47          NaN
382   493a007719304340a9895accde141db3          NaN
408   6b89419de5d54f28b6d06baed976dfbf          NaN
460   e57b78423b854fea96d28cab60acdf32          NaN
1110  6abd11f05d5946f1a3153fda47a8b565          NaN

2020-06-15T00:

In [179]:
areas = ['Wohnbereich', 'Tischnummer', 'Zimmernummer']

G = ConstructNetwork(data, start_week)
contact_df = ConstructInteractionTable(data, G, start_week)

In [184]:
contact_df = contact_df.reset_index()

In [186]:
contact_df = contact_df.rename(columns={'index':'Bewohner'})

In [191]:
contact_df.to_csv('../../data/nursing_homes/WB{}_links_{}.csv'.format(WB, start_week.date()), index=False)

In [195]:
contact_df[contact_df['Wohnbereich_link'] == 1]['transmission'].sum()/ len(contact_df[contact_df['Wohnbereich_link'] == 1]['transmission'])

0.27586206896551724

In [196]:
contact_df

Unnamed: 0,Bewohner,Wohnbereich_link,Tischnummer_link,Zimmernummer_link,transmission
0,fd722bf047214c41a50e3e39e19fea82,1,0,0,0
1,0548264809c04079aa8542e894765c24,1,0,0,0
2,c202699481244bf99734c37f76db2033,1,0,0,0
3,52478570f41d4a1481197b0fc55000ca,1,0,0,0
4,5be069836e304d2d95eb34b4471a7d41,1,0,0,0
5,59a3aa5b346e49b6ae600d5b1dd6de8e,0,0,0,0
6,b2835c40e9aa4ad98a71fb41a1a491b3,1,0,0,0
7,a33e00c70dad4b6489e550f807cb41f7,1,0,0,1
8,c0f2c009cdab4c4c83fc71e8bb9f4f47,1,0,0,1
9,493a007719304340a9895accde141db3,1,0,0,1


In [198]:
data['Wohnbereich'].value_counts()

4.0    906
7.0    106
5.0     42
Name: Wohnbereich, dtype: int64