In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib import font_manager, rc
font_name = font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
rc('font', family=font_name)
%matplotlib inline

import glob
import sklearn
import torch
import torch.nn as nn

# 1. Data Load

In [None]:
df = pd.read_csv('./data/total.csv')

In [5]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [8]:
# 인덱스를 날짜 변수로 변경
df.set_index('Date', inplace=True)
df.head()

Unnamed: 0_level_0,roam,g_confirmed,g_deaths,g_recovered,g_dayconfirmed,g_daydeaths,g_dayrecovered,k_confirmed,k_dayconfirmed
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-22,23527,555,17,28,0.0,0.0,0.0,1,1
2020-01-23,21559,654,18,30,99.0,1.0,2.0,1,0
2020-01-24,20726,941,26,36,287.0,8.0,6.0,2,1
2020-01-25,18496,1434,42,39,493.0,16.0,3.0,2,0
2020-01-26,26214,2118,56,52,684.0,14.0,13.0,3,1


# 2. Data Preprocessing

In [15]:
df.columns

Index(['roam', 'g_confirmed', 'g_deaths', 'g_recovered', 'g_dayconfirmed',
       'g_daydeaths', 'g_dayrecovered', 'k_confirmed', 'k_dayconfirmed'],
      dtype='object')

In [16]:
# float 타입으로 변경
all_data = df.values.astype('float64')

In [17]:
all_data[:5]

array([[2.3527e+04, 5.5500e+02, 1.7000e+01, 2.8000e+01, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 1.0000e+00, 1.0000e+00],
       [2.1559e+04, 6.5400e+02, 1.8000e+01, 3.0000e+01, 9.9000e+01,
        1.0000e+00, 2.0000e+00, 1.0000e+00, 0.0000e+00],
       [2.0726e+04, 9.4100e+02, 2.6000e+01, 3.6000e+01, 2.8700e+02,
        8.0000e+00, 6.0000e+00, 2.0000e+00, 1.0000e+00],
       [1.8496e+04, 1.4340e+03, 4.2000e+01, 3.9000e+01, 4.9300e+02,
        1.6000e+01, 3.0000e+00, 2.0000e+00, 0.0000e+00],
       [2.6214e+04, 2.1180e+03, 5.6000e+01, 5.2000e+01, 6.8400e+02,
        1.4000e+01, 1.3000e+01, 3.0000e+00, 1.0000e+00]])

In [22]:
print('데이터 길이: %s' % len(all_data))
test_len = (len(all_data) * 0.3)
print('test_set 30%% 지정 : %s' % test_len)

데이터 길이: 70
test_set 30% 지정 : 21.0


In [23]:
# test 데이터의 크기 지정

test_data_size = 21

train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]

In [24]:
print("length of train data : %s" % len(train_data))
print("length of test data : %s" % len(test_data))
print(test_data[:10])

length of train data : 49
length of test data : 21
[[2.19100e+03 1.25875e+05 4.61500e+03 6.69950e+04 7.25500e+03 3.53000e+02
  2.59900e+03 7.75500e+03 2.42000e+02]
 [2.71600e+03 1.28352e+05 4.72000e+03 6.83160e+04 2.47700e+03 1.05000e+02
  1.32100e+03 7.86900e+03 1.14000e+02]
 [2.49100e+03 1.45205e+05 5.40400e+03 7.02430e+04 1.68530e+04 6.84000e+02
  1.92700e+03 7.97900e+03 1.10000e+02]
 [2.84000e+03 1.56101e+05 5.81900e+03 7.26160e+04 1.08960e+04 4.15000e+02
  2.37300e+03 8.08600e+03 1.07000e+02]
 [2.96500e+03 1.67454e+05 6.44000e+03 7.60260e+04 1.13530e+04 6.21000e+02
  3.41000e+03 8.16200e+03 7.60000e+01]
 [2.45100e+03 1.81574e+05 7.12600e+03 7.80790e+04 1.41200e+04 6.86000e+02
  2.05300e+03 8.23600e+03 7.40000e+01]
 [2.11500e+03 1.97102e+05 7.90500e+03 8.08310e+04 1.55280e+04 7.79000e+02
  2.75200e+03 8.32000e+03 8.40000e+01]
 [1.94400e+03 2.14821e+05 8.73300e+03 8.33030e+04 1.77190e+04 8.28000e+02
  2.47200e+03 8.41300e+03 9.30000e+01]
 [2.49100e+03 2.42570e+05 9.86700e+03 8.49660

In [25]:
# scaling 진행

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))

In [26]:
print(train_data_normalized[:5])
print(train_data_normalized[-5:])

[[-0.60332153]
 [-0.99064239]
 [-0.99971337]
 [-0.9995279 ]
 [-1.        ]]
[[-0.91519137]
 [-0.99538021]
 [-0.96779632]
 [-0.87332659]
 [-0.99940988]]


## issue
- 예제코드에는 feature들이 없고 날짜와 y값만 있었다.
- 사용하는 데이터는 feature들이 많기 때문에 수정이 필요하다.

In [27]:
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)

In [28]:
train_data_normalized[:5]

tensor([-0.6033, -0.9906, -0.9997, -0.9995, -1.0000])