In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import math
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.tsa.stattools import adfuller
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Dropout
from keras.optimizers import SGD
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_excel('Data c&w.xlsx')

In [3]:
df.tail()

Unnamed: 0,Year,DJF,JFM,FMA,MAM,AMJ,MJJ,JJA,JAS,ASO,SON,OND,NDJ
70,2020,0.5,0.5,0.4,0.2,-0.1,-0.3,-0.4,-0.6,-0.9,-1.2,-1.3,-1.2
71,2021,-1.0,-0.9,-0.8,-0.7,-0.5,-0.4,-0.4,-0.5,-0.7,-0.8,-1.0,-1.0
72,2022,-1.0,-0.9,-1.0,-1.1,-1.0,-0.9,-0.8,-0.9,-1.0,-1.0,-0.9,-0.8
73,2023,-0.7,-0.4,-0.1,0.2,0.5,0.8,1.1,1.3,1.6,1.8,1.9,2.0
74,2024,1.8,1.5,1.1,0.7,0.4,0.2,0.0,-0.1,,,,


In [4]:
df.dropna(inplace=True)

In [5]:
# Drop the 'Year' column, as it is not needed for time series prediction
data = df.drop(columns=['Year'])

In [6]:
#### Replace all column names by overwritting on it

cols = ['January','February','March','April','May','June','July','August','September','October','November','December']

data.columns = cols

data

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December
0,-1.5,-1.3,-1.2,-1.2,-1.1,-0.9,-0.5,-0.4,-0.4,-0.4,-0.6,-0.8
1,-0.8,-0.5,-0.2,0.2,0.4,0.6,0.7,0.9,1.0,1.2,1.0,0.8
2,0.5,0.4,0.3,0.3,0.2,0.0,-0.1,0.0,0.2,0.1,0.0,0.1
3,0.4,0.6,0.6,0.7,0.8,0.8,0.7,0.7,0.8,0.8,0.8,0.8
4,0.8,0.5,0.0,-0.4,-0.5,-0.5,-0.6,-0.8,-0.9,-0.8,-0.7,-0.7
...,...,...,...,...,...,...,...,...,...,...,...,...
69,0.7,0.7,0.7,0.7,0.5,0.5,0.3,0.1,0.2,0.3,0.5,0.5
70,0.5,0.5,0.4,0.2,-0.1,-0.3,-0.4,-0.6,-0.9,-1.2,-1.3,-1.2
71,-1.0,-0.9,-0.8,-0.7,-0.5,-0.4,-0.4,-0.5,-0.7,-0.8,-1.0,-1.0
72,-1.0,-0.9,-1.0,-1.1,-1.0,-0.9,-0.8,-0.9,-1.0,-1.0,-0.9,-0.8


In [7]:
# Mengonversi kolom tanggal menjadi objek tanggal (jika perlu)
if 'Date' in df.columns:
   data['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Tetapkan kolom tanggal sebagai indeks jika ada
if 'Date' in df.columns:
    data.set_index('Date', inplace=True)

# Visualisasikan baris pertama DataFrame untuk memahami strukturnya
print("Baris pertama DataFrame:")
print(data)

Baris pertama DataFrame:
    January  February  March  April  May  June  July  August  September  \
0      -1.5      -1.3   -1.2   -1.2 -1.1  -0.9  -0.5    -0.4       -0.4   
1      -0.8      -0.5   -0.2    0.2  0.4   0.6   0.7     0.9        1.0   
2       0.5       0.4    0.3    0.3  0.2   0.0  -0.1     0.0        0.2   
3       0.4       0.6    0.6    0.7  0.8   0.8   0.7     0.7        0.8   
4       0.8       0.5    0.0   -0.4 -0.5  -0.5  -0.6    -0.8       -0.9   
..      ...       ...    ...    ...  ...   ...   ...     ...        ...   
69      0.7       0.7    0.7    0.7  0.5   0.5   0.3     0.1        0.2   
70      0.5       0.5    0.4    0.2 -0.1  -0.3  -0.4    -0.6       -0.9   
71     -1.0      -0.9   -0.8   -0.7 -0.5  -0.4  -0.4    -0.5       -0.7   
72     -1.0      -0.9   -1.0   -1.1 -1.0  -0.9  -0.8    -0.9       -1.0   
73     -0.7      -0.4   -0.1    0.2  0.5   0.8   1.1     1.3        1.6   

    October  November  December  
0      -0.4      -0.6      -0.8  
1     

In [8]:
# Setting 100 percent data for training
training_data_len = math.ceil(len(df) * .8)
training_data_len 

#Splitting the dataset
train_data = data[:training_data_len].iloc[:,:1] 
test_data = data[training_data_len:].iloc[:,:1]
print(train_data.shape, test_data.shape)

(60, 1) (14, 1)


In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 74 entries, 0 to 73
Data columns (total 12 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   January    74 non-null     float64
 1   February   74 non-null     float64
 2   March      74 non-null     float64
 3   April      74 non-null     float64
 4   May        74 non-null     float64
 5   June       74 non-null     float64
 6   July       74 non-null     float64
 7   August     74 non-null     float64
 8   September  74 non-null     float64
 9   October    74 non-null     float64
 10  November   74 non-null     float64
 11  December   74 non-null     float64
dtypes: float64(12)
memory usage: 7.5 KB


In [10]:
# Selecting Month values
dataset_train = train_data.January.values 
# Reshaping 1D to 2D array
dataset_train = np.reshape(dataset_train, (-1,1)) 
dataset_train.shape

(60, 1)

In [11]:
scaler = MinMaxScaler(feature_range=(0,1))
# scaling dataset
scaled_train = scaler.fit_transform(dataset_train)

print(scaled_train[:5])

[[0.075]
 [0.25 ]
 [0.575]
 [0.55 ]
 [0.65 ]]


In [12]:
# Selecting Open Price values
dataset_test = test_data.January.values 
# Reshaping 1D to 2D array
dataset_test = np.reshape(dataset_test, (-1,1))  
# Normalizing values between 0 and 1
scaled_test = scaler.fit_transform(dataset_test)  
print(*scaled_test[:5])

[0.74358974] [0.] [0.12820513] [0.25641026] [0.25641026]


In [13]:
X_train = []
y_train = []
for i in range(50, len(scaled_train)):
    X_train.append(scaled_train[i-50:i, 0])
    y_train.append(scaled_train[i, 0])
    if i <= 51:
        print(X_train)
        print(y_train)
        print()

[array([0.075, 0.25 , 0.575, 0.55 , 0.65 , 0.275, 0.175, 0.4  , 0.9  ,
       0.6  , 0.425, 0.45 , 0.4  , 0.35 , 0.725, 0.3  , 0.8  , 0.35 ,
       0.3  , 0.725, 0.575, 0.1  , 0.275, 0.9  , 0.   , 0.325, 0.05 ,
       0.625, 0.625, 0.45 , 0.6  , 0.375, 0.45 , 1.   , 0.3  , 0.2  ,
       0.325, 0.75 , 0.65 , 0.025, 0.475, 0.55 , 0.875, 0.475, 0.475,
       0.7  , 0.225, 0.325, 1.   , 0.075])]
[0.025000000000000022]

[array([0.075, 0.25 , 0.575, 0.55 , 0.65 , 0.275, 0.175, 0.4  , 0.9  ,
       0.6  , 0.425, 0.45 , 0.4  , 0.35 , 0.725, 0.3  , 0.8  , 0.35 ,
       0.3  , 0.725, 0.575, 0.1  , 0.275, 0.9  , 0.   , 0.325, 0.05 ,
       0.625, 0.625, 0.45 , 0.6  , 0.375, 0.45 , 1.   , 0.3  , 0.2  ,
       0.325, 0.75 , 0.65 , 0.025, 0.475, 0.55 , 0.875, 0.475, 0.475,
       0.7  , 0.225, 0.325, 1.   , 0.075]), array([0.25 , 0.575, 0.55 , 0.65 , 0.275, 0.175, 0.4  , 0.9  , 0.6  ,
       0.425, 0.45 , 0.4  , 0.35 , 0.725, 0.3  , 0.8  , 0.35 , 0.3  ,
       0.725, 0.575, 0.1  , 0.275, 0.9  , 0.  

In [14]:
X_test = []
y_test = []
for i in range(50, len(scaled_test)):
    X_test.append(scaled_test[i-50:i, 0])
    y_test.append(scaled_test[i, 0])

In [15]:
# The data is converted to Numpy array
X_train, y_train = np.array(X_train), np.array(y_train)

#Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1],1))
y_train = np.reshape(y_train, (y_train.shape[0],1))
print("X_train :",X_train.shape,"y_train :",y_train.shape)

X_train : (10, 50, 1) y_train : (10, 1)


In [16]:
# The data is converted to numpy array
X_test, y_test = np.array(X_test), np.array(y_test)

# Jika X_test hanya 1D, tambahkan dimensi kedua
if len(X_test.shape) == 1:
    X_test = np.reshape(X_test, (X_test.shape[0], 1, 1))  # Misalnya 1 fitur per sampel
elif len(X_test.shape) == 2:
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))  # Tambahkan dimensi ketiga

# Reshape y_test hanya jika diperlukan (misalnya 2D ke 1D)
y_test = np.reshape(y_test, (y_test.shape[0], 1))

print("X_test:", X_test.shape, "y_test:", y_test.shape)

X_test: (0, 1, 1) y_test: (0, 1)


NameError: name 'X' is not defined

In [38]:
X_test

array([], shape=(0, 1, 1), dtype=float64)