In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'london-bike-sharing-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F376751%2F731448%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240425%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240425T220816Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D1211cd1ffce6f92c244516cdb8b6de8187bf3dc4d4914920fb8fc2ed9ec1e89f05946b7349f644e4e419d983a947820352e710c612cc1e5f72e1cf445f220878dac261169f723769267eb89978aa02eef3cb712cd76fd6fb5751f3df93508c85456cb67ba54128630db8f36b1527bf17d061bd85eb19cc5cf75f7152e44e2635c16fa25db452c2002b7cdf0cb7c53d645d913a4ba6fb2ce0f20de86716cecd60ffd8227c38065b270393b8500ad06935c1486cff9c4822fb698dc8d4644b48aa23eb4fb7a4bdeec467649aafbb6b8ec8a5a77963a42ac214696e098ab8b60e7f1f29d1e336bbf3284b4103a89b6920735db4ba46b5d840d3524c76a76b6cee50'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/london-bike-sharing-dataset/london_merged.csv', parse_dates=['timestamp'])
df.head()

In [None]:
df['timestamp']

In [None]:
df.shape

In [None]:
#17000개 학습용, 414개 테스트용

train = df.iloc[:17000,1:2]
test = df.iloc[17000:17414, 1:2]

In [None]:
print(train.shape)
print(test.shape)

In [None]:
df['cnt'][:17000].plot(figsize=(15,4), legend=True)
df['cnt'][17000:].plot(figsize=(15,4), legend=True)
plt.legend(['train','test'])
plt.title('bike share demand')
plt.show()

In [None]:
#시계열 분석을 위한 데이터 전처리 작업
#별도의 변수는 필요없다. 몇시간 전 데이터를 통해서 학습. 차수를 추정하는 방법.

In [None]:
#pacf 이용하여 차수 추정
from statsmodels.tsa.stattools import pacf
pacf = pacf(df['cnt'], nlags=20, method='ols')
print(pacf)

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(pacf, lags=9, method='ols', title='pa').show

In [None]:
#전처리 작업 - 데이터의 스케일작업 (MinMaxscale)

from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0,1))
train_scaled = sc.fit_transform(train)

In [None]:
train_scaled

In [None]:
train_scaled.shape

In [None]:
#시계열 딥러닝은 자기 자신의 과거를 독립변수로 활용한다. 1시간 단위로 시프트 시키는 작업을 해본다.

In [None]:
x_train = []
y_train = []

for i in range(1, 17000):
    x_train.append(train_scaled[i-1:i, 0])
    y_train.append(train_scaled[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train.shape

In [None]:
#시계열 딥러닝은 3차원 배열을 필요로 한다. 3차원 배열로 전처리

In [None]:
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1],1))
x_train.shape

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers import SimpleRNN

In [None]:
rnn = Sequential()
rnn.add(SimpleRNN(activation='relu', units=6, input_shape=(1,1)))
rnn.add(Dense(activation='linear', units=1))

In [None]:
print(rnn.summary())

In [None]:
rnn.compile(loss='mse', optimizer='adam', metrics=['mse'])

In [None]:
rnn.fit(x_train, y_train, batch_size=1, epochs=2)

In [None]:
inputs = sc.transform(test)
inputs.shape

In [None]:
x_test = []
for i in range(1, 415):
    x_test.append(inputs[i-1:i,0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))
x_test.shape

In [None]:
rnn = rnn.predict(x_test)
rnn = sc.inverse_transform(rnn)

In [None]:
test1 = pd.DataFrame(test)
rnn1= pd.DataFrame(rnn)

In [None]:
test1.plot(figsize=(15,4), legend=True)
plt.legend(['cnt'])
plt.title('bike share demand')
plt.show()

In [None]:
rnn1.plot(figsize=(15,4), legend=True)
plt.legend(['RNN'])
plt.title('bike share demand')
plt.show()

In [None]:
test = np.array(test)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(test, marker='.', label='cnt', color='black')
plt.plot(rnn, marker=',', label='RNN', color='red')
plt.legend()

In [None]:
from keras.layers import LSTM
lstm = Sequential()
lstm.add(LSTM(units=6, activation='relu', input_shape=(1,1)))
lstm.add(Dense(units=1, activation='linear'))

In [None]:
print(lstm.summary())

In [None]:
lstm.compile(loss='mse', optimizer='adam', metrics=['mse'])
lstm.fit(x_train, y_train, batch_size=1, epochs=2)

In [None]:
lstm = lstm.predict(x_test)
lstm = sc.inverse_transform(lstm)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(test, marker='.', label='cnt', color='black')
plt.plot(lstm, marker=',', label='RNN', color='green')
plt.legend()

In [None]:
from keras.layers import GRU

gru = Sequential()
gru.add(GRU(units=6, activation='relu', input_shape=(1,1)))
gru.add(Dense(units=1, activation='linear'))

In [None]:
print(gru.summary())

In [None]:
gru.compile(loss='mse', optimizer='adam', metrics=['mse'])
gru.fit(x_train, y_train, batch_size=1, epochs=2)

In [None]:
gru = gru.predict(x_test)
gru = sc.inverse_transform(gru)

In [None]:
plt.figure(figsize=(15,5))
plt.plot(test, marker='.', label='cnt', color='black')
plt.plot(gru, marker=',', label='GRU', color='blue')
plt.legend()

In [None]:
plt.figure(figsize=(15,5))
plt.plot(test, marker='.', label='cnt', color='black')
plt.plot(rnn, marker=',', label='RNN', color='red')
plt.plot(lstm, marker=',', label='LSTM', color='green')
plt.plot(gru, marker=',', label='GRU', color='blue')
plt.legend()

In [None]:
from sklearn.metrics import mean_squared_error

def RMSE(y_test, y_predict):
    return np.sqrt(mean_squared_error(y_test, y_predict))

In [None]:
print('RNN RMSE:', RMSE(test, rnn))

In [None]:
print('LSTM RMSE:', RMSE(test, lstm))

In [None]:
print('GRU RMSE:', RMSE(test, gru))