# Data Time Series

In [1]:
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [2]:
url = 'https://github.com/dhamvi01/Univariate-Time-Series-using-LSTM/blob/1bee832e0ad8741e0a759a4ff272d53f7a4e267d/airline-passengers.csv?raw=true'
df = pd.read_csv(url, index_col=0)
print(df)

         Passengers
Month              
1949-01         112
1949-02         118
1949-03         132
1949-04         129
1949-05         121
...             ...
1960-08         606
1960-09         508
1960-10         461
1960-11         390
1960-12         432

[144 rows x 1 columns]


In [3]:
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
  X, y = list(), list()
  for i in range(len(sequence)):
    # find the end of this pattern
    end_ix = i + n_steps
    # check if we are beyond the sequence
    if end_ix > len(sequence)-1:
      break
    # gather input and output parts of the pattern
    seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
    X.append(seq_x)
    y.append(seq_y)
  return array(X), array(y)

In [4]:
from numpy import array
passengers = df["Passengers"].values

# series = passengers
print(passengers.shape)
# transform to a supervised learning problem
X, y = split_sequence(passengers, 4)
print(X.shape, y.shape)
# show each sample
for i in range(len(X)):
  print(X[i], y[i])

(144,)
(140, 4) (140,)
[112 118 132 129] 121
[118 132 129 121] 135
[132 129 121 135] 148
[129 121 135 148] 148
[121 135 148 148] 136
[135 148 148 136] 119
[148 148 136 119] 104
[148 136 119 104] 118
[136 119 104 118] 115
[119 104 118 115] 126
[104 118 115 126] 141
[118 115 126 141] 135
[115 126 141 135] 125
[126 141 135 125] 149
[141 135 125 149] 170
[135 125 149 170] 170
[125 149 170 170] 158
[149 170 170 158] 133
[170 170 158 133] 114
[170 158 133 114] 140
[158 133 114 140] 145
[133 114 140 145] 150
[114 140 145 150] 178
[140 145 150 178] 163
[145 150 178 163] 172
[150 178 163 172] 178
[178 163 172 178] 199
[163 172 178 199] 199
[172 178 199 199] 184
[178 199 199 184] 162
[199 199 184 162] 146
[199 184 162 146] 166
[184 162 146 166] 171
[162 146 166 171] 180
[146 166 171 180] 193
[166 171 180 193] 181
[171 180 193 181] 183
[180 193 181 183] 218
[193 181 183 218] 230
[181 183 218 230] 242
[183 218 230 242] 209
[218 230 242 209] 191
[230 242 209 191] 172
[242 209 191 172] 194
[209 191 

In [5]:
from numpy import array
import pandas as pd

# create time series data
data = df["Passengers"].values

# define function to split a univariate sequence into samples
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

# transform to a supervised learning problem
X, y = split_sequence(data, 2)

# create a dataframe with labeled data
X_labeled = pd.DataFrame(X, columns=["xt-2", "xt-1",])
y_labeled = pd.DataFrame(y, columns=["xt"])
df = pd.concat([X_labeled, y_labeled], axis=1)

# display dataframe as table
display(df)

Unnamed: 0,xt-2,xt-1,xt
0,112,118,132
1,118,132,129
2,132,129,121
3,129,121,135
4,121,135,148
...,...,...,...
137,535,622,606
138,622,606,508
139,606,508,461
140,508,461,390


In [6]:
y_max = df['xt'].max()
y_min = df['xt'].min()

print("Nilai xt terbesar: ", y_max)
print("Nilai xt terkecil: ", y_min)


Nilai xt terbesar:  622
Nilai xt terkecil:  104


In [7]:
from sklearn.preprocessing import MinMaxScaler

# inisialisasi scaler
scaler = MinMaxScaler()

# ambil kolom yang akan dinormalisasi
cols_to_normalize = ['xt-2', 'xt-1', 'xt']

# normalisasi data
df[cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])

# tampilkan hasil normalisasi
print(df)


         xt-2      xt-1        xt
0    0.015444  0.027027  0.054054
1    0.027027  0.054054  0.048263
2    0.054054  0.048263  0.032819
3    0.048263  0.032819  0.059846
4    0.032819  0.059846  0.084942
..        ...       ...       ...
137  0.832046  1.000000  0.969112
138  1.000000  0.969112  0.779923
139  0.969112  0.779923  0.689189
140  0.779923  0.689189  0.552124
141  0.689189  0.552124  0.633205

[142 rows x 3 columns]


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# inisialisasi scaler
scaler = MinMaxScaler()

# ambil kolom yang akan dinormalisasi
cols_to_normalize = ['xt-2', 'xt-1', 'xt']

# normalisasi data
df[cols_to_normalize] = scaler.fit_transform(df[cols_to_normalize])

# split data menjadi training dan testing
train_data, test_data = train_test_split(df, test_size=0.2)

# tampilkan sebagian data training dan testing
display('Data Training', train_data)
print("\n")
display('Data Testing', test_data)

# tampilkan jumlah data training dan testing
print('Jumlah data training:', len(train_data))
print('Jumlah data testing:', len(test_data))

'Data Training'

Unnamed: 0,xt-2,xt-1,xt
111,0.471042,0.500000,0.638996
106,0.388031,0.447876,0.455598
79,0.469112,0.401544,0.328185
10,0.000000,0.027027,0.021236
80,0.401544,0.328185,0.256757
...,...,...,...
17,0.086873,0.127413,0.127413
94,0.322394,0.389961,0.407336
45,0.167954,0.131274,0.173745
52,0.241313,0.268340,0.308880






'Data Testing'

Unnamed: 0,xt-2,xt-1,xt
129,0.584942,0.498069,0.581081
101,0.6139,0.696911,0.700772
136,0.710425,0.832046,1.0
46,0.131274,0.173745,0.177606
8,0.061776,0.028958,0.0
75,0.318533,0.320463,0.407336
63,0.237452,0.250965,0.30888
140,0.779923,0.689189,0.552124
95,0.389961,0.407336,0.380309
110,0.498069,0.471042,0.5


Jumlah data training: 113
Jumlah data testing: 29


In [9]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# inisialisasi model
model = LinearRegression()

# pisahkan fitur dan label pada data training dan testing
X_train = train_data[['xt-2', 'xt-1']]
y_train = train_data['xt']

X_test = test_data[['xt-2', 'xt-1']]
y_test = test_data['xt']

# training model pada data training
model.fit(X_train, y_train)

# evaluasi model pada data testing
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

# tampilkan nilai error
print('Nilai error:', mse)

Nilai error: 0.0045728787008920075


In [10]:
from sklearn.metrics import mean_squared_error
import math

# Menghitung RMSE
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE:', rmse)

RMSE: 0.0676230633799742


In [11]:
from sklearn.neighbors import KNeighborsRegressor

# inisialisasi model KNN
k = 3 # jumlah tetangga terdekat yang dipertimbangkan
knn = KNeighborsRegressor(n_neighbors=k)

# training model
knn.fit(X_train, y_train)

# prediksi data testing
y_pred = knn.predict(X_test)

# evaluasi model menggunakan RMSE
from sklearn.metrics import mean_squared_error
import math
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE:', rmse)

RMSE: 0.09028230124689367
