In [1]:
import pandas as pd
import numpy as np
import os
import datetime as dt
import seaborn as sns
from scipy.signal import savgol_filter
from scipy.stats import gamma, poisson
import matplotlib as mpl
import itertools
import matplotlib.pyplot as plt
import re

In [7]:
data = pd.read_excel('../data/COVID19_variants_국내.xlsx', index_col = False)
data.rename(columns = {'Unnamed: 0':'date'}, inplace = True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19242 entries, 0 to 19241
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   date     19242 non-null  object
 1   city     19242 non-null  object
 2   -        19242 non-null  int64 
 3   BLANK    19242 non-null  int64 
 4   Beta     19242 non-null  int64 
 5   Alpha    19242 non-null  int64 
 6   Gamma    19242 non-null  int64 
 7   Kappa    19242 non-null  int64 
 8   Epsilon  19242 non-null  int64 
 9   Eta      19242 non-null  int64 
 10  Iota     19242 non-null  int64 
 11  Mu       19242 non-null  int64 
 12  Delta    19242 non-null  int64 
 13  Omicron  19242 non-null  int64 
dtypes: int64(12), object(2)
memory usage: 2.1+ MB


In [9]:
data['date'] = pd.to_datetime(data['date'])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19242 entries, 0 to 19241
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   date     19242 non-null  datetime64[ns]
 1   city     19242 non-null  object        
 2   -        19242 non-null  int64         
 3   BLANK    19242 non-null  int64         
 4   Beta     19242 non-null  int64         
 5   Alpha    19242 non-null  int64         
 6   Gamma    19242 non-null  int64         
 7   Kappa    19242 non-null  int64         
 8   Epsilon  19242 non-null  int64         
 9   Eta      19242 non-null  int64         
 10  Iota     19242 non-null  int64         
 11  Mu       19242 non-null  int64         
 12  Delta    19242 non-null  int64         
 13  Omicron  19242 non-null  int64         
dtypes: datetime64[ns](1), int64(12), object(1)
memory usage: 2.1+ MB


In [11]:
data = data.set_index('date')
data

Unnamed: 0_level_0,city,-,BLANK,Beta,Alpha,Gamma,Kappa,Epsilon,Eta,Iota,Mu,Delta,Omicron
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-19,Busan,0,0,0,0,0,0,0,0,0,0,0,0
2020-01-20,Busan,0,0,0,0,0,0,0,0,0,0,0,0
2020-01-21,Busan,0,0,0,0,0,0,0,0,0,0,0,0
2020-01-22,Busan,0,0,0,0,0,0,0,0,0,0,0,0
2020-01-23,Busan,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-18,total,0,0,0,0,0,0,0,0,0,0,0,6
2022-12-19,total,0,0,0,0,0,0,0,0,0,0,0,16
2022-12-20,total,0,0,0,0,0,0,0,0,0,0,0,31
2022-12-21,total,0,0,0,0,0,0,0,0,0,0,0,7


In [14]:
# 도시별로 나누기

## 어떤 도시들이 있는지?
data.groupby('city').first().index ### 부산, 충북, 충남, 대구, 대전, 강원, 광주, 경북, 경기, 경남, 인천, 제주, 전북, 전남, 세종, 서울, 울산, 합계

Index(['Busan', 'Chungbuk', 'Chungnam', 'Daegu', 'Daejeon', 'Gangwon',
       'Gwangju', 'Gyeongbuk', 'Gyeonggi', 'Gyeongnam', 'Incheon', 'Jeju',
       'Jeonbuk', 'Jeonnam', 'Sejong', 'Seoul', 'Ulsan', 'total'],
      dtype='object', name='city')

In [15]:
## 3개의 컬럼을 제외하고는 sparse하므로 제거
data = data[['city', '-', 'Delta', 'Omicron']]
data

Unnamed: 0_level_0,city,-,Delta,Omicron
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-19,Busan,0,0,0
2020-01-20,Busan,0,0,0
2020-01-21,Busan,0,0,0
2020-01-22,Busan,0,0,0
2020-01-23,Busan,0,0,0
...,...,...,...,...
2022-12-18,total,0,0,6
2022-12-19,total,0,0,16
2022-12-20,total,0,0,31
2022-12-21,total,0,0,7


In [16]:
## 도시들이 꽤나 많으므로 광역시들의 Rt를 우선적으로 계산해 그래프로 나타내자

Incheon_df = data[data['city'] == 'Incheon']
Busan_df = data[data['city'] == 'Busan']
Seoul_df = data[data['city'] == 'Seoul']
Daegu_df = data[data['city'] == 'Daegu']
Daejeon_df = data[data['city'] == 'Daejeon']
Gwangju_df = data[data['city'] == 'Gwangju']
Ulsan_df = data[data['city'] == 'Ulsan']





In [18]:
conda install epyestim

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.

Note: you may need to restart the kernel to use updated packages.



PackagesNotFoundError: The following packages are not available from current channels:

  - epyestim

Current channels:

  - https://repo.anaconda.com/pkgs/main/win-64
  - https://repo.anaconda.com/pkgs/main/noarch
  - https://repo.anaconda.com/pkgs/r/win-64
  - https://repo.anaconda.com/pkgs/r/noarch
  - https://repo.anaconda.com/pkgs/msys2/win-64
  - https://repo.anaconda.com/pkgs/msys2/noarch

To search for alternate channels that may provide the conda package you're
looking for, navigate to

    https://anaconda.org

and use the search bar at the top of the page.




In [17]:
import epyestim
import epyestim.covid19 as covid19


# 내장함수 불러오기
si_distrb = covid19.generate_standard_si_distribution() ## serial interval mean 4.3일을 고려한 분포 생성
delay_distrb = covid19.generate_standard_infection_to_reporting_distribution() ## 평균 delay mean : 10.3일 고려한 분포 생성



ModuleNotFoundError: No module named 'epyestim'