In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df_case = pd.read_csv("Korea/PatientInfo.csv")
df_case = df_case.rename(columns = {'confirmed_date':'date'})
df_float = pd.read_csv("Korea/SeoulFloating.csv")
df_regions = pd.read_csv("Korea/Region.csv")
df_weather = pd.read_csv("Korea/Weather.csv")

In [3]:
case_city = pd.DataFrame(df_case.groupby(['date', 'city']).count()['patient_id']).reset_index()
fp_city = pd.DataFrame(df_float.groupby(['date', 'city']).sum()['fp_num']).reset_index()
fp_age = pd.DataFrame(df_float.groupby(['date', 'city', 'sex', 'birth_year']).sum()['fp_num']).reset_index()

In [4]:
df_corona = case_city.merge(fp_city, how='right', on = ['date', 'city']).fillna(0)
df_corona['propotion_case'] = df_corona['patient_id']/df_corona['fp_num']
df_corona = df_corona.drop(['patient_id'], axis = 1).sort_values(['date'])

In [5]:
from datetime import datetime, timedelta

def symptom_apparition(date, onset):
    if onset == True:
        contamination = [(datetime.strptime(date, '%Y-%m-%d') - timedelta(days=x)).strftime('%Y-%m-%d') for x in range(2,12)]
    else : 
        contamination = [(datetime.strptime(date, '%Y-%m-%d') - timedelta(days=x)).strftime('%Y-%m-%d') for x in range(4,14)]
    return contamination

In [6]:
df_case['contamination_period'] = df_case[['symptom_onset_date', 'date']] \
.apply(lambda x : symptom_apparition(x['symptom_onset_date'], True)  if pd.notnull(x['symptom_onset_date']) \
else (symptom_apparition(x['date'], False) if pd.notnull(x['date']) \
else 'NaN'), axis = 1)
case_daily = df_case[['contamination_period', 'city', 'sex', 'age', 'patient_id']].explode('contamination_period').groupby(['contamination_period', 'city', 'sex', 'age']).count().reset_index()

In [7]:
df_corona = df_corona.merge(case_daily, left_on = ['date', 'city'], right_on = ['contamination_period', 'city']) \
.drop(['contamination_period'], axis = 1)
df_corona = df_corona.rename(columns = {'patient_id':'total_number'})
df_corona['contaminated'] = np.ones(len(df_corona))
df_corona['age'] = df_corona['age'].apply(lambda x : int(x[:-1]))

In [8]:
df_corona = df_corona.merge(fp_age, how = 'right', left_on = ['date', 'city', 'sex', 'age'], right_on = ['date', 'city', 'sex', 'birth_year']) \
.drop(['age'], axis = 1)
df_corona['combined'] = df_corona[['total_number', 'fp_num_y']].values.tolist()
df_corona = df_corona.explode('combined').drop(['total_number', 'fp_num_y'], axis = 1)
df_corona = df_corona.merge(fp_city, how='right', on = ['date', 'city']).fillna(0).drop(['fp_num_x'], axis = 1)
df_corona['contaminated'] = df_corona.apply(lambda x : 1-x.index%2)['contaminated']
df_corona['new_combined'] = df_corona['combined'].diff()

In [9]:
df_corona['group_number'] = np.where(df_corona.index % 2 == 0, df_corona.combined, df_corona.new_combined)
df_corona = df_corona.drop(['combined', 'new_combined'], axis = 1)
df_corona = df_corona.rename(columns = {'fp_num':'total_number'})
df_corona = df_corona.merge(df_regions, on = ['city']).drop(['code'], axis = 1)
df_corona = df_corona.merge(df_weather, on = ['date', 'province'])

In [11]:
df_corona.to_csv('corona.csv')