In [1]:
import os
import pandas as pd
import csv

import math
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import font_manager, rc
from matplotlib import style
%matplotlib inline

import folium
import json

from IPython.display import display

In [2]:
patient_table = pd.read_csv("/home/kh/19Fall/Corona/data/PatientInfo.csv", parse_dates = ['confirmed_date'])
region = pd.read_csv("/home/kh/19Fall/Corona/data/Region.csv")
travel_route = pd.read_csv("~/19Fall/Corona/data/travel_route.csv")

In [3]:
patient_table.head(10)

Unnamed: 0,patient_id,global_num,sex,birth_year,age,country,province,city,disease,infection_case,infected_by,confirmed_date
0,1000000001,2.0,male,1964.0,50s,Korea,Seoul,Gangseo-gu,,overseas inflow,,2020-01-23
1,1000000002,5.0,male,1987.0,30s,Korea,Seoul,Jungnang-gu,,overseas inflow,,2020-01-30
2,1000000003,6.0,male,1964.0,50s,Korea,Seoul,Jongno-gu,,contact with patient,2002000000.0,2020-01-30
3,1000000004,7.0,male,1991.0,20s,Korea,Seoul,Mapo-gu,,overseas inflow,,2020-01-30
4,1000000005,9.0,female,1992.0,20s,Korea,Seoul,Seongbuk-gu,,contact with patient,1000000000.0,2020-01-31
5,1000000006,10.0,female,1966.0,50s,Korea,Seoul,Jongno-gu,,contact with patient,1000000000.0,2020-01-31
6,1000000007,11.0,male,1995.0,20s,Korea,Seoul,Jongno-gu,,contact with patient,1000000000.0,2020-01-31
7,1000000008,13.0,male,1992.0,20s,Korea,Seoul,etc,,overseas inflow,,2020-02-02
8,1000000009,19.0,male,1983.0,30s,Korea,Seoul,Songpa-gu,,overseas inflow,,2020-02-05
9,1000000010,21.0,female,1960.0,60s,Korea,Seoul,Seongbuk-gu,,contact with patient,1000000000.0,2020-02-05


### visualize infection route using BFS

In [42]:
class Order:
    def __init__(self, rect_size, rows, cols):
        self.region_count = dict()
        self.patient_pos = dict()
        for city in region['province']:
            self.region_count['city'] = 0
        self.rows = rows
        self.cols = cols
        self.lat_interval = rect_size / (rows + 1)
        self.lon_interval = rect_size / (cols + 1)
        self.lat_init = rect_size / 2 - self.lat_interval
        self.lon_init = rect_size / 2 - self.lon_interval
        self.epsilons = np.arange(-self.lat_interval/4, self.lat_interval/4, self.lat_interval/cols)
        self.epsilons_len = len(self.epsilons)
        
    def set_loc(self, patient_id, city_name): # 점(환자)의 좌표 계산
        num = None
        epsilon = None
        if patient_id in self.patient_pos:
            num, epsilon = self.patient_pos[patient_id]
        else:
            num = self.region_count[city_name]
            # eps_len = len(self.epsilons)
            epsilon = self.epsilons[np.random.randint(self.epsilon_len)]
            self.patient_pos[patient_id] = (num, epsilon)
            self.region_count[city_name] += 1
        row = num // self.rows
        col = num % self.rows
        latitude_jit = self.lat_init - self.lat_interval * row + epsilon
        longitude_jit = self.long_init + self.long_interval * col
        return latitude_jit, longitude_jit
        
def get_location(info): #  환자의 province, city정보에 대한 좌표 
    info = info.reset_index()
    region_info = None
    city = info.loc[0, 'city']
    province = info.loc[0, 'province']
    if city is np.nan or city ==  'etc': # city 값이 NaN이거나 'etc'인 경우
        region_info = region[region['province'] == province]
    else:
        region_info = region[(region['province'] == province) & (region['city'] == city)]
    return region_info.reset_index().loc[0, ['latitude', 'longitude']]
        
def get_city_name(info):
    info = info.reset_index()
    return info.loc[0, 'city']
        
### first to sixth level spreader
depth_color = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']

### visualize edges between spreader and infectee on the map
### spreader : spreader's patient_id column value
### infectee : infectee's patient_id column value
### depth : the level of contagion
### m = folium map
### order = arange each nodes' position on the map

def draw_spreading_line(spreader, patient, depth, m, order):
    spreader_info = patient_table[patient_table['patient_id'] == spreader]
    patient_info = patient_table[patient_table['patient_id'] == patient]
    spreader_pos = list(get_location(spreader_info))
    patient_pos = list(get_location(patient_info))
    spreader_city = get_city_name(spreader_info)
    patient_city = get_city_name(patient_info)
    
    Ordering = Order(0.01, 10, 10)
    
    jit_s_lat, jit_s_lon = Ordering.set_loc(spreader, spreader_city)
    jit_p_lat, jit_p_lon = Ordering.set_loc(patient, patient_city)
    
    color = (lambda x: depth_color[depth] if type(x) is int else x)(depth)
    
    ### first level spreader
    if (depth == 0):
        inform = (
        str(spreader)
        + '(' + spreader_info['province'].to_string(index=False)
            + (lambda x : x.to_string(index=False) if x.values[0] is not np.nan else '')(spreader_info['city'])
            + ") start" 
        )
        spreader_case = spreader_count[spreader_count['infected_by'] == spreader]
        for i in range(len(spreader_case)):
            case = spreader_case.iloc[i]
            where = case['infection_case']
            count = case['count']
            inform += f'<br>{count:3} patient : {where}'
        inform += (
        '<br>total_contact_number: '
        + (lambda x: str(int(x.values[0])) if not np.isnan(x.values[0]) else '?')(spreader_info['contact_number'])
        ) 
        
        lat = spreader_pos[0] + jit_s_lat
        lon = spreader_pos[0] + jit_s_lon
        
        folium.CircleMaker(
        location=(lat, lon),
        fill_color = color,
        fill_opacity = 1.0,
        color = color,
        radius = 4,
        weight = 2,
        tooltip = inform
        ).add_to(m)
    
    # things on the tooltip
    inform = (
    str(patient)
    +  '(' + spreader_info['province'].to_string(index = False)
    + (lambda x: x.to_string(index = False) if x.values[0] is not np.nan else '')(patient_info['city'])
    + 'infected by '
    + str(spreader)
    + '(' + spreader_info['province'].to_string(index = False)
    + (lambda x: x.to_string(index = False) if x.values[0] is not np.nan else '')(spreader_info['city'])
    +  ')<br>infection case : ' 
    + (lambda x: x.to_string(index = False) if x.values[0] is not np.nan else '')(patient_info['infection_case'])
    + '<br>total contact number'
    + (lambda x: str(int(x.values[0])) if not np.isnan(x.values[0]) else '?')(patient_info['contact_number'])
    )
            
    if spreader_pos == patient_pos:
            spreader_pos[0] += jit_s_lat
            spreader_pos[1] += jit_s_lon
            patient_pos[0] += jit_p_lat
            patient_pos[1] += jit_p_lon
            
            folium.PolyLine(
                locations = [spreader_pos, patient_pos],
                color = color,
                weight = 1,
                tooltip = inform
            ).add_to(m)
            
            folium.CircleMarker(
                location = patient_pos,
                fill_color = color,
                color = color,
                radius = 4,
                weight = 2,
                tooltip = inform
            ).add_to(m)
            
    else:
            spreader_pos[0] += jit_s_lat
            spreader_pos[1] += jit_s_lon
            patient_pos[0] += jit_p_lat
            patient_pos[1] += jit_p_lon
            
            folium.PolyLine(
                locations = [spreader_pos, patient_pos],
                color = color,
                weight = 1,
                tooltip = inform
            ).add_to(m)
            
            folium.RegularPolygonMarker(
                locations = patient_pos,
                fill_color = color,
                fill_opacity = 0.2,
                color = color,
                number_of_sides = 4,
                radius = 7,
                weight = 2,
                tooltip = inform
            ).add_to(m)
            
def draw_no_spreader(spreader, patient, depth, m, display_order):
    patient_info = patient_table[patient_table['patient_id'] == patient]
    patient_pos = list(get_location(patient_info))
    patient_city = get_city_name(patient_info)
            
def draw_cycle(spreader, patient, color, m, order):
            spreader_info = patient_table[patient_table['patient_id'] == spreader]
            patient_info = patient_table[patient_table['patient_id'] == patient]
            spreader_pos = list(get_location(spreader_info))
            patient_pos = list(get_location(patient_info))
            spreader_city = get_city_name(spreader_info)
            patient_city = get_city_name(patient_info)
            
            inform = (
                str(patient)
                +  '(' + spreader_info['province'].to_string(index = False)
                + (lambda x: x.to_string(index = False) if x.values[0] is not np.nan else '')(patient_info['city'])
                + 'infected by '
                + str(spreader)
                + '(' + spreader_info['province'].to_string(index = False)
                + (lambda x: x.to_string(index = False) if x.values[0] is not np.nan else '')(spreader_info['city'])
                +  ')<br>infection case : ' 
                + (lambda x: x.to_string(index = False) if x.values[0] is not np.nan else '')(patient_info['infection_case'])
                + '<br>total contact number'
                + (lambda x: str(int(x.values[0])) if not np.isnan(x.values[0]) else '?')(patient_info['contact_number'])
            )

            Ordering = Order(0.01, 10, 10)
    
            jit_s_lat, jit_s_lon = Ordering.set_loc(spreader, spreader_city)
            jit_p_lat, jit_p_lon = Ordering.set_loc(patient, patient_city)
            
            spreader_pos[0] += jit_s_lat; spreader_pos[1] += jit_s_lon
            patient_pos[0] += jit_p_lat; patient_pos[1] += jit_p_lon
            
            folium.PolyLine(
                locations = [spreader_pos, patient_pos],
                color = color,
                weight = 1,
                tooltip = inform
            ).add_to(m)
            
            folium.CircleMarker(
                locations = patient_pos,
                fill_color = color,
                fill_opacity = 1.0,
                color = color,
                radius = 4,
                weight = 2,
                tooltip = inform
            ).add_to(m)

In [43]:
patient_and_spreader = patient_table[['patient_id', 'infected_by']].dropna(axis = 0).astype('int64')

infection_route = dict()
rir = dict()

for i in patient_and_spreader.index:
    patient = patient_and_spreader.loc[i, 'patient_id']
    spreader = patient_and_spreader.loc[i, 'infected_by']
    
    rir[patient] = spreader
    if spreader in infection_route:
        infection_route[spreader].append(patient)
    else:
        infection_route[spreader] = [patient]

# create tree and eliminate cycle
spreader_root = set()
cycles = []

for spreader in infection_route.keys():
    maybe_cycle = [spreader]
    while spreader in rir:
        spreader = rir[spreader]
        # start to eliminate cycle
        if spreader in maybe_cycle:
            idx = maybe_cycle.index(spreader)
            candidate = maybe_cycle[idx:]
            for cycle in cycles:
                for node in candidate:
                    if node not in cycle:
                        break
                    else:
                        if len(cycle) == len(candidate): break
            else:
                cycles.append(maybe_cycle[idx:])
        break
        
        maybe_cycle.append(spreader)
    spreader_root.add(spreader)

In [46]:
center = [36.641876, 127.488759]
rect_size = 0.01
display_order = Order(rect_size, 10, 10)
m = folium.Map(location = center,
              zoom_start = 8,
              tiles = 'cartodbpositron'
              )
for i in range(len(region)):
    if i == len(region) - 1:
        continue
    loc = region.loc[i, ['latitude', 'longitude']]
    province = region.loc[i, ['province']].to_string(index = False)
    city = region.loc[i,['city']].to_string(index = False)
    inform = (province + ' ' + (lambda x: x if x != province else '')(city))
    
    folium.Rectangle(
    bounds = (loc + [rect_size/2, - rect_size/2],
             loc + [-rect_size/2, rect_size/2]),
    radius = 180,
    color = 'black',
    fill_color = 'black',
    tooltip = inform
    ).add_to(m)
    
# Explore infection level through BFS
print('case 1: No spreader')
visited = []
queue = []
for starter in spreader_root:
    visited.append(starter)
    queue.append((starter, 0))

while len(queue) > 0:
    spreader, depth = queue[0]
    queue.pop(0)
    for patient in infection_route[spreader]:
        try:
            draw_spreading_line(spreader, patient, depth, m, display_order)
        except KeyError:
            print('Spreader id does not exist : ' + str(spreader) + ' ' +'Patient : ' + str(patient))
            draw_no_spreader(spreader, patient, depth, m, display_order)
        if patient in infection_route and patient not in visited:
            queue.append((patient, depth +1))
            visited.append(patient)
# display cycle
print('\ncase 2 : cycle start')
n = 1
for cycle in cycles:
    print(f'cycle {n}')
    n += 1
    for node in cycle:
        patient = patient_info.loc[patient_info['patient_id'] == node, ['patient_id', 'infected_by']].astype('int64').values[:]
        print(f'patitent {patient[0,0]} infected by {patient[0,1]}')
              
    cycle.append(cycle[0])
    for i in range(len(cycle)-1):
            try:
                draw_cycle(cycle[i], cycle[i+1], 'white', m, display_order)
            except:
                print('spreader id does not exist : ' + str(spreader) + 'patient :' + str(patient))
    cycle.pop()

case 1: No spreader
Spreader id does not exist : 2002000001 Patient : 1000000003
Spreader id does not exist : 1000000002 Patient : 1000000005
Spreader id does not exist : 1000000003 Patient : 1000000006
Spreader id does not exist : 1000000003 Patient : 1000000007
Spreader id does not exist : 1000000003 Patient : 1000000010
Spreader id does not exist : 1000000003 Patient : 1000000017
Spreader id does not exist : 1500000002 Patient : 1000000040
Spreader id does not exist : 1000000264 Patient : 1000000311
Spreader id does not exist : 1000000264 Patient : 1000000341
Spreader id does not exist : 1000000266 Patient : 1000000297
Spreader id does not exist : 1000000266 Patient : 1000000298
Spreader id does not exist : 1000000266 Patient : 1000000491
Spreader id does not exist : 1000000140 Patient : 1000000164
Spreader id does not exist : 1000000396 Patient : 1000000415
Spreader id does not exist : 1000000396 Patient : 1000000416
Spreader id does not exist : 1000000015 Patient : 1000000020
Spre

In [45]:
m

In [38]:
get_loc(patient_table)

NameError: name 'get_loc' is not defined