In [4]:
import pandas as pd
import numpy as np
import copy
import matplotlib.pyplot as plt
import itertools
from tqdm import tqdm
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from datetime import timedelta
import pmdarima as pm
from pmdarima.arima import auto_arima
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from statsmodels.tsa.api import VAR

### data

In [3]:
from data_half import data_loader_half
import easydict

In [4]:
args = easydict.EasyDict()
args['device'] = 'cuda:0'
args['train_days'] = 21
args['delay_days'] = 0
args['test_days'] = 7
args['num_epochs'] = 100
args['learning_rate'] = 1e-5
args['num_dong'] = 139
args['lstm_num_layers'] = 1
args['lstm_input_size'] = 50
args['lstm_hidden_size'] = 100
args['lstm_sequence_length'] = 1
args['graph_conv_feature_dim_user'] = 20
args['graph_conv_feature_dim_user'] = 20
args['data_path'] = '/home/hyungjun/jupyter/KT_covid19/data/'
args['task'] = 'real_feb'
args['data_normalize'] = False

In [12]:
train_dataset, train_loader, test_dataset, test_loader, min_max_values = data_loader_half(args)

== data import ==
=== data preprocess done ===


In [5]:
edge_daegu_infect = np.load('./data/raw_data/half/edge_daegu_infect_half_feb.npy', allow_pickle=True)
edge_daegu_fake_infect_full = np.load('./data/raw_data/half/edge_daegu_fake_infect_half_full.npy', allow_pickle=True)
edge_daegu_user = np.load('./data/raw_data/half/edge_daegu_user_half_feb.npy', allow_pickle=True) # 2월
edge_daegu_user_mar = np.load('./data/raw_data/half/edge_daegu_user_half_mar.npy', allow_pickle=True) 
edge_daegu_user_full = np.concatenate((edge_daegu_user, edge_daegu_user_mar))
node_daegu_fake_infect_full = torch.from_numpy(np.load('./data/raw_data/half/node_daegu_fake_infect_half_mar.npy', allow_pickle=True))
node_daegu_infect = torch.from_numpy(np.load('./data/raw_data/half/node_daegu_fake_infect_half_feb.npy', allow_pickle=True))
node_daegu_user = torch.from_numpy(np.load('./data/raw_data/half/node_daegu_user_half_feb.npy', allow_pickle=True)) # 2월
node_daegu_user_mar = torch.from_numpy(np.load('./data/raw_data/half/node_daegu_user_half_mar.npy', allow_pickle=True))
node_daegu_user_full = torch.cat((node_daegu_user, node_daegu_user_mar), dim=0)

# order : user, infect , user_full, infect_full
min_max_values = [torch.min(node_daegu_user).item(), torch.max(node_daegu_user).item(), 
                  np.min(edge_daegu_user[:,4]), np.max(edge_daegu_user[:,4]), 
                  torch.min(node_daegu_infect).item(), torch.max(node_daegu_infect).item(),
                  np.min(edge_daegu_user[:,4]), np.max(edge_daegu_user[:,4]), 
                  torch.min(node_daegu_user_full).item(), torch.max(node_daegu_user_full).item(), 
                  np.min(edge_daegu_user_full[:,4]), np.max(edge_daegu_user_full[:,4]), 
                  torch.min(node_daegu_fake_infect_full).item(), torch.max(node_daegu_fake_infect_full).item(),
                  np.min(edge_daegu_fake_infect_full[:,4]), np.max(edge_daegu_fake_infect_full[:,4])]

In [6]:
data = torch.zeros([len(train_dataset), 139])
for i in range(len(train_dataset)):
    data[i] = train_dataset[i][1][0]
data = data.transpose(1,0)

target = torch.zeros([len(test_dataset), 139])
for i in range(len(test_dataset)):
    target[i] = test_dataset[i][1][0]
target_t = target.t()

# Historical Average

### 필요 함수

In [16]:
def forecast_fnc(model, n_period):
    fc, conf_int = model.predict(n_periods=n_period, return_conf_int=True)
    return fc.tolist(), np.asarray(conf_int).tolist()

def accuracy(forecast, true):
    return torch.nn.functional.mse_loss(forecast, true)

In [161]:
def ha_predict(data, predict_len):
    predict = torch.zeros([data.shape[0], 48])
    forecast = torch.zeros([data.shape[0], predict_len*48]) # window = 48
    
    for i in range(data.shape[0]):
        for j in range(predict_len*48):
            if j < 5:
                forecast[i][j] = (torch.sum(data[i][data.shape[1]-5+j:data.shape[1]]) + torch.sum(forecast[i][0:j]))/5
            else:
                forecast[i][j] = torch.mean(forecast[i][j-5:j])
            
    return forecast

In [162]:
predict = ha_predict(data, 7)

In [163]:
predict = predict.transpose(1,0)

In [165]:
predict.shape

torch.Size([336, 139])

In [167]:
daily_mse = torch.zeros([len(test_dataset)])
for i in range(len(test_dataset)):
    daily_mse[i] = accuracy(target[i], predict[i])
    
print(torch.mean(daily_mse))
    

tensor(1712490.2500)


In [216]:
daily_mse.shape

torch.Size([336])

# ARIMA

In [118]:
data = torch.zeros([len(train_dataset), 139])
for i in range(len(train_dataset)):
    data[i] = train_dataset[i][1][0]
data = data.transpose(1,0)

In [182]:
torch.min(data)

tensor(375.)

In [124]:
TEST_DAYS=7/
forecasts = torch.zeros([139, 48*TEST_DAYS])

for i in range(139):
    print('='*20, i, '='*20)
    
    model = pm.auto_arima(data[i], start_p=0, start_q=0,
                          max_p=7, max_q=7, m=1,
                          start_P=0, seasonal=False,
                          d=1, D=0, trace=True,
                          error_action='ignore',  # don't want to know if an order does not work
                          suppress_warnings=True,  # don't want convergence warnings
                          stepwise=True)  # set to stepwise
    forecast_dong, _ = forecast_fnc(model, 48*TEST_DAYS)
    forecasts[i] = torch.tensor(forecast_dong)
    

Performing stepwise search to minimize aic
Fit ARIMA(0,1,0)x(0,0,0,0) [intercept=True]; AIC=12618.938, BIC=12628.768, Time=0.034 seconds
Fit ARIMA(1,1,0)x(0,0,0,0) [intercept=True]; AIC=12560.105, BIC=12574.849, Time=0.174 seconds
Fit ARIMA(0,1,1)x(0,0,0,0) [intercept=True]; AIC=12576.073, BIC=12590.818, Time=0.061 seconds
Fit ARIMA(0,1,0)x(0,0,0,0) [intercept=False]; AIC=12616.939, BIC=12621.854, Time=0.014 seconds
Fit ARIMA(2,1,0)x(0,0,0,0) [intercept=True]; AIC=12533.659, BIC=12553.317, Time=0.252 seconds
Fit ARIMA(3,1,0)x(0,0,0,0) [intercept=True]; AIC=12526.798, BIC=12551.372, Time=0.109 seconds
Fit ARIMA(4,1,0)x(0,0,0,0) [intercept=True]; AIC=12486.650, BIC=12516.139, Time=0.178 seconds
Fit ARIMA(5,1,0)x(0,0,0,0) [intercept=True]; AIC=12484.691, BIC=12519.094, Time=0.214 seconds
Fit ARIMA(6,1,0)x(0,0,0,0) [intercept=True]; AIC=12485.582, BIC=12524.900, Time=0.238 seconds
Fit ARIMA(5,1,1)x(0,0,0,0) [intercept=True]; AIC=12486.153, BIC=12525.471, Time=0.480 seconds
Fit ARIMA(4,1,1)

In [126]:
target = torch.zeros([len(test_dataset), 139])
for i in range(len(test_dataset)):
    target[i] = test_dataset[i][1][0]

In [132]:
forecasts = forecasts.transpose(1,0)

In [133]:
daily_mse = torch.zeros([len(test_dataset)])
for i in range(len(test_dataset)):
    daily_mse[i] = accuracy(target[i], forecasts[i])
    
print(torch.mean(daily_mse))

tensor(5579067.)


# VAR

In [188]:
np_data = data.detach().cpu().numpy()

In [207]:
np_data = np_data.transpose()

In [208]:
model = VAR(np_data)

In [214]:
results = model.fit(48)

In [223]:
var_pred = results.forecast(np_data, 336)
var_pred = torch.tensor(var_pred)

In [225]:
daily_mse_var = torch.zeros([len(test_dataset)])
for i in range(len(test_dataset)):
    daily_mse_var[i] = accuracy(target[i], var_pred[i])
    
print(torch.mean(daily_mse_var))

tensor(859621.2500)


In [331]:
daily_mse_var

tensor([  13061.1680,   26750.0371,   26409.8926,   24601.6113,   27025.0996,
          41349.0352,   76792.0859,  102331.5234,  123987.7500,  116341.1094,
          89432.6641,   66573.2344,   60644.4219,  108826.6641,  192518.8750,
         264029.4688,  436990.5938,  416340.1875,  303012.5312,  303895.3750,
         269101.5938,  193314.3594,  170674.0312,  157107.8281,  124866.0547,
         136439.9375,  141678.1250,  204003.9531,  230239.0156,  303779.6250,
         391125.7812,  488820.2188,  502731.7188,  369382.4062,  345501.9375,
         247184.2031,  231246.7031,  216719.6406,  234583.0000,  292072.0312,
         305666.5938,  234891.9844,  206855.9688,  170207.6562,  197657.2656,
         133875.8594,  195211.9062,  134024.4688,  115012.4453,  123543.0547,
         119868.2812,  181910.4531,  250592.3594,  487402.4375,  757208.7500,
         892663.0000, 1198829.6250, 1000661.7500,  809856.6250,  551273.1875,
         325911.2812,  247838.4062,  199739.3438,  217671.0625, 

---

In [8]:
edge_daegu_user_df = pd.DataFrame(edge_daegu_user, columns=['etl_ymd', 'time', 'from_dong', 'to_dong', 'mv_num'])
to_dong_df = edge_daegu_user_df.groupby(['etl_ymd', 'time', 'to_dong']).sum()["mv_num"].reset_index()
from_dong_df = edge_daegu_user_df.groupby(['etl_ymd', 'time', 'from_dong']).sum()['mv_num'].reset_index()
to_dong_np = np.array(to_dong_df['mv_num']).reshape([29, 48, 139])
from_dong_np = np.array(from_dong_df['mv_num']).reshape([29, 48, 139])
train_from_dong, test_from_dong = torch.tensor(from_dong_np[:21].reshape([-1, 139])), torch.tensor(from_dong_np[21:].reshape(-1, 139))
train_to_dong, test_to_dong = torch.tensor(to_dong_np[:21].reshape([-1, 139])), torch.tensor(to_dong_np[21:].reshape(-1, 139))
train_node, test_node = torch.tensor(node_daegu_user[:21].reshape([-1, 139])), torch.tensor(node_daegu_user[21:].reshape(-1, 139))

  


In [9]:
train_from_dong_t = train_from_dong.t()
train_to_dong_t = train_to_dong.t()
train_node_t = train_node.t()
test_from_dong_t = test_from_dong.t()
test_to_dong_t = test_to_dong.t()
test_node_t = test_node.t()

# simple DNN

In [None]:
class SimpleDNN(nn.Module):
    def __init__(self):
        super(SimpleDNN, self).__init__()
        self.fc1 = nn.Linear(144, 64)
        self.fc1_act = nn.ReLU()
        self.fc2 = nn.Linear(64, 64)
        self.fc2_act = nn.ReLU()
        self.fc3 = nn.Linear(64, 32)
        self.fc3_act = nn.ReLU()
        self.classifier = nn.Linear(32,1)
    def forward(self, x):
        inter_x = x[0].type(torch.FloatTensor).to('cuda:0')
        intra_from_x = x[1].type(torch.FloatTensor).to('cuda:0')
        intra_to_x = x[2].type(torch.FloatTensor).to('cuda:0')
        concated_x = torch.cat([inter_x.squeeze(0), intra_from_x.squeeze(0), intra_to_x.squeeze(0)])
        x = self.fc1_act(self.fc1(concated_x))
        x = self.fc2_act(self.fc2(x))
        x = self.fc3_act(self.fc3(x))  
        output = self.classifier(x)
        return output

In [10]:
model = SimpleDNN()
params = 0
for p in model.parameters():
    params += len(p.view(-1))
print(params)
print(params*139)

15553
2161867


In [None]:
# single dong modeling
num_epochs = 25
learning_rate = 1e-3
val_results = torch.zeros([139, len(test_loader)])

test_targets = torch.zeros([139, len(test_loader)])
test_pred = torch.zeros([139, len(test_loader)])

for dong in tqdm(range(139)):
    # data ready
    train_inputs, train_target = [], []
    test_inputs, test_target = [], []
    for i in range(len(data[0])-48):
        train_inputs.append([train_from_dong_t[dong][i:i+48], train_to_dong_t[dong][i:i+48], train_node_t[dong][i:i+48]])
        train_target.append(data[dong][i+48])
    for i in range(len(target_t[0])-48):
        test_inputs.append([test_from_dong_t[dong][i:i+48], test_to_dong_t[dong][i:i+48], test_node_t[dong][i:i+48]])
        test_target.append(target_t[dong][i+48])
    dnn_train_dataset = list(zip(train_inputs, train_target))
    dnn_test_dataset = list(zip(test_inputs, test_target))
    dnn_train_loader = DataLoader(dnn_train_dataset, shuffle=True)
    dnn_test_loader = DataLoader(dnn_test_dataset, shuffle=True)
    
    # model ready
    model = SimpleDNN()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    mse_criterion = nn.MSELoss()
    model.to('cuda:0')
    
    model.train()
    
    for epoch in range(num_epochs):
        for i, inputs in enumerate(dnn_train_loader):
            train_input, train_target = inputs[0], inputs[1].to('cuda:0')
            output = model(train_input)
            loss = mse_criterion(output, train_target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    model.eval()
    for i, inputs in enumerate(dnn_test_loader):
        test_input, test_target = inputs[0], inputs[1].to('cuda:0')
        output = model(test_input)
        test_pred[dong][i] = output.item()
        test_targets[dong][i] = test_target.item()
        val_results[dong][i] = mse_criterion(output, test_target)

In [15]:
output

tensor([5745.4888], device='cuda:0', grad_fn=<AddBackward0>)

In [17]:
test_pred_t = test_pred.t()
test_targets_t = test_targets.t()
daily_mse = torch.zeros([len(test_targets_t)])

for i in range(len(test_dataset)):
    daily_mse[i] = accuracy(test_targets_t[i], test_pred_t[i])
    
print(torch.mean(daily_mse))

val_results_np = daily_mse.detach().numpy()
np.save("./results/baselines/Simple_dnn_48.npy", val_results_np)

tensor(323469.8438)


In [None]:
np.load('./results/baselines/Simple_dnn_1.npy')

In [654]:
# for the comparison on parameter size
from models import GC_TG_LSTM

args = easydict.EasyDict()
args['device'] = 'cuda:0'
args['train_days'] = 21
args['delay_days'] = 0
args['test_days'] = 7
args['num_epochs'] = 250
args['learning_rate'] = 1e-5
args['num_dong'] = 139
args['lstm_num_layers'] = 1
args['lstm_input_size'] = 50
args['lstm_hidden_size'] = 100
args['lstm_sequence_length'] = 1
args['graph_conv_feature_dim_infect'] = 20
args['graph_conv_feature_dim_user'] = 20
args['deep_graph'] = True
args['concat_fc'] = True
args['temp_all_concat'] = False
args['all_concat'] = False
args['loss'] = 'mse'
args['task'] = 'real_feb'
args['data_path'] = '/home/hyungjun/jupyter/KT_covid19/data/'
args['data_normalize'] = True

gcn_lstm = GC_TG_LSTM(args)
gcn_lstm.load_state_dict(torch.load('/home/hyungjun/jupyter/KT_covid19/results/models/real_feb_mse_lr1e-05_d21-0-7_s1_catTrue_tempFalse_allFalse_lstm-h-dim100_lstm-in-dim50_g-user-dim20_g-infect-dim20_deepTrue/epochs_246_iter_1007.pt'))

params = 0
for p in gcn_lstm.parameters():
    params += len(p.view(-1))
print (params)

1680299


# DNN + LSTM

In [22]:
for i in range(len(data[0])-48):
    train_inputs.append([train_from_dong_t[dong][i:i+48], train_to_dong_t[dong][i:i+48], train_node_t[dong][i:i+48]])
    train_target.append(data[dong][i+48])
for i in range(len(target_t[0])-48):
    test_inputs.append([test_from_dong_t[dong][i:i+48], test_to_dong_t[dong][i:i+48], test_node_t[dong][i:i+48]])
    test_target.append(target_t[dong][i+48])

NameError: name 'data' is not defined

In [21]:
def denormalize(x, min_val, max_val):
    return x * (max_val - min_val) + min_val

class DNN_LSTM(nn.Module):
    def __init__(self):
        super(DNN_LSTM, self).__init__()
        self.dnn_node = nn.Sequential(
                            nn.Linear(139, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.ReLU(),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 32),
                            nn.ReLU())
        self.dnn_from_edge = nn.Sequential(
                            nn.Linear(139, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.ReLU(),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 32),
                            nn.ReLU())
        self.dnn_to_edge = nn.Sequential(
                            nn.Linear(139, 256),
                            nn.ReLU(),
                            nn.Linear(256, 128),
                            nn.ReLU(),
                            nn.Linear(128, 64),
                            nn.ReLU(),
                            nn.Linear(64, 32),
                            nn.ReLU())
        self.lstm = nn.LSTM(input_size=96, hidden_size=100, num_layers=1)
        self.lstm_act = nn.ReLU()
        self.lstm_fc = nn.Linear(100, 139)
        self.device = args.device
    def forward(self, x):
        self.lstm_input_tensor = torch.zeros([1, 48, 96]).to(self.device)
        for i in range(48):
            node = self.dnn_node(x[0][i].type(torch.FloatTensor).to(self.device))
            from_edge = self.dnn_from_edge(x[1][i].type(torch.FloatTensor).to(self.device))
            to_edge = self.dnn_to_edge(x[2][i].type(torch.FloatTensor).to(self.device))
        h_0 = Variable(torch.randn([self.lstm_num_layers, 48*self.lstm_sequence_length, self.lstm_hidden_size], requires_grad=True)).to(self.device)
        c_0 = Variable(torch.randn([self.lstm_num_layers, 48*self.lstm_sequence_length, self.lstm_hidden_size], requires_grad=True)).to(self.device)
        lstm_output, (h_out, c_out) = self.lstm(self.lstm_input_tensor, (h_0, c_0))    
        output = self.lstm_fc(self.lstm_act(lstm_output))
        return output

In [20]:
model = DNN_LSTM()
params = 0
for p in model.parameters():
    params += len(p.view(-1))
print(params)

330455


In [14]:
dnn_train_inputs, dnn_train_target = [], []
dnn_test_inputs, dnn_test_target = [], []
for i in range(len(train_dataset)):
    dnn_train_inputs.append(train_dataset[i][0][0])
    dnn_train_target.append(train_dataset[i][1][0])
for i in range(len(test_dataset)):
    dnn_test_inputs.append(test_dataset[i][0][0])
    dnn_train_target.append(train_dataset[i][1][1])
dnn_train_dataset = list(zip(dnn_train_inputs, dnn_train_target))
dnn_test_dataset = list(zip(dnn_test_inputs, dnn_train_target))
dnn_train_loader = DataLoader(dnn_train_dataset, shuffle=True)
dnn_test_loader = DataLoader(dnn_test_dataset, shuffle=True)

In [19]:
# multi dong modeling --> 수정필요
num_epochs = 50 
learning_rate = 1e-3
val_results = torch.zeros([len(test_loader)])

# model ready
model = DNN_LSTM()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
mse_criterion = nn.MSELoss()
model.to('cuda:0')

model.train()

for epoch in range(num_epochs):
    for i, inputs in enumerate(dnn_train_loader):
        train_input, train_target = inputs[0], inputs[1].to('cuda:0')
        output = model(train_input)
        loss = mse_criterion(output, train_target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

model.eval()
for i, inputs in enumerate(test_loader):
    test_input, test_target = inputs[0], inputs[1].to('cuda:0')
    output = model(test_input)
    val_results[dong][i] = mse_criterion(output, test_target)

RuntimeError: mat1 dim 1 must match mat2 dim 0

# Flu data reconstruction

In [5]:
flu = np.load('./data/raw_data/flu/flu_np.npy', allow_pickle=True)

In [6]:
flu_seoul = flu[flu[:,2]=='seoul']

In [7]:
flu_seoul_lst = flu_seoul.tolist()
cols = np.unique(flu_seoul[:,3])
print(cols)
len(cols)

['dobong' 'dongdaemun' 'dongjak' 'dongjak ' 'eunpyeong' 'gangbuk'
 'gangdong' 'gangnam' 'gangseo' 'geumcheon' 'guro' 'gwanak' 'gwangjin'
 'gwangmyeong' 'jongno ' 'jungnang' 'mapo' 'nowon' 'seocho' 'seudaemun'
 'songpa' 'sungbuk' 'yangcheon' 'yeongdeungpo' 'yongsan' 'yongsan ']


26

In [8]:
np.where(flu_seoul[:,3]=='yongsan ')
flu_seoul_lst[547][3] = 'yongsan'
np.where(flu_seoul[:,3]=='dongjak ')
flu_seoul_lst[447][3] = 'dongjak'
np.where(flu_seoul[:,3]=='jongno ')
flu_seoul_lst[545][3] = 'jongno'
np.where(flu_seoul[:,3]=='gwangmyeong')
del flu_seoul_lst[672]
flu_seoul_lst[672]

['2019_2020', 78, 'seoul', 'geumcheon', '2020-01-17', 'FluA', 'Not Done', 'ND']

In [9]:
flu_seoul = np.array(flu_seoul_lst)
cols = np.unique(flu_seoul[:,3])
print(cols)
len(cols)

['dobong' 'dongdaemun' 'dongjak' 'eunpyeong' 'gangbuk' 'gangdong'
 'gangnam' 'gangseo' 'geumcheon' 'guro' 'gwanak' 'gwangjin' 'jongno'
 'jungnang' 'mapo' 'nowon' 'seocho' 'seudaemun' 'songpa' 'sungbuk'
 'yangcheon' 'yeongdeungpo' 'yongsan']


23

In [10]:
cols = np.append(cols, 'junggu')
cols = np.append(cols, 'sungdong')

In [11]:
cols = sorted(cols)

In [16]:
import datetime

In [17]:
idx_17_18 = [datetime.datetime(year=2017, month=11, day=1)+datetime.timedelta(days=i) for i in range(151)]
idx_18_19 = [datetime.datetime(year=2018, month=11, day=1)+datetime.timedelta(days=i) for i in range(151)]

idx_19_20 = [datetime.datetime(year=2019, month=11, day=1)+datetime.timedelta(days=i) for i in range(152)]

In [18]:
idx = idx_17_18 + idx_18_19 + idx_19_20

In [19]:
def test_check(data):
    if data[4] == 'Not Done':
        if data[6] =='Not Done' or data[6] == 'No Test':
            return None
        return data[6].split('-')
    else:
        return data[4].split('-')

In [20]:
flu_df = pd.DataFrame(data = np.zeros([len(idx), len(cols)]), index=idx, columns=cols)
for i in range(len(flu_seoul)):
    date = test_check(flu_seoul[i,:])
    if date ==None:
        continue
    year, mon, day = int(date[0]), int(date[1]), int(date[2])
    date = datetime.datetime(year=year, month=mon, day=day)
    sgg = flu_seoul[i,:][3]
    if date not in idx:
        continue
    for k in range(5):
        curr_date = date+datetime.timedelta(days=k)
        if curr_date not in idx:
            continue
        flu_df.loc[date+datetime.timedelta(days=k),sgg] += 1

In [40]:
flu_df.iloc[370]

dobong           0.0
dongdaemun       0.0
dongjak          0.0
eunpyeong        0.0
gangbuk          0.0
gangdong         0.0
gangnam          1.0
gangseo          0.0
geumcheon        9.0
guro             3.0
gwanak           0.0
gwangjin         0.0
jongno           0.0
junggu           0.0
jungnang         0.0
mapo             0.0
nowon            0.0
seocho           1.0
seudaemun        0.0
songpa           0.0
sungbuk          0.0
sungdong         0.0
yangcheon        1.0
yeongdeungpo    12.0
yongsan          0.0
Name: 2020-01-08 00:00:00, dtype: float64

In [43]:
flu_df.to_numpy()

array([ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  9.,  3.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1., 12.,  0.])

In [44]:
np.save('./data/raw_data/flu/flu_seoul.npy',flu_df.to_numpy())

In [49]:
np.load('./data/raw_data/flu/flu_seoul.npy')[373]

array([0., 0., 1., 1., 0., 0., 0., 0., 9., 4., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 5., 0.])