In [62]:
import pandas as pd 
import numpy as np
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("../Datasets/Manipulated Data/train.csv")
df.head(3)

Unnamed: 0,period,target,gunluk_vaka,toplam_vaka,gunluk_olum,toplam_olum,gasoline_imports,gasoline_exports,crude_imports,crude_exports,...,jet_output,propane_output,residual_fuel_output,conventional_gasoline_spot_price,crude_brent_spot_price,gasoline_future_price1,gasoline_future_price2,gasoline_future_price3,gasoline_future_price4,gasoline_demand
0,2010-10-15,10639,0.0,0.0,0.0,0.0,779,200,8600,33,...,1296,1044,414,2.202,83.11,2.139,2.119,2.128,2.146,9460
1,2010-10-22,10036,0.0,0.0,0.0,0.0,999,200,9463,33,...,1345,1021,473,2.146,81.43,2.077,2.063,2.076,2.097,10639
2,2010-10-29,9886,0.0,0.0,0.0,0.0,871,200,8578,33,...,1309,1032,401,2.122,82.25,2.098,2.072,2.085,2.106,10036


Function for Machine Learning

## LSTM

In [63]:
train = df.iloc[:-52,:]
test = df.iloc[-52:,:]

In [28]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_train = scaler.fit_transform(train[['target']])
scaled_test = scaler.transform(test[['target']])

scaled_test.shape, scaled_train.shape

((52, 1), (441, 1))

In [29]:
from keras.preprocessing.sequence import TimeseriesGenerator

n_input = 52
n_features = 1

train_generator = TimeseriesGenerator(scaled_train, scaled_train, length=n_input, batch_size=1)

In [49]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

model = Sequential()

model.add(LSTM(128, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 128)               66560     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
Total params: 66,689
Trainable params: 66,689
Non-trainable params: 0
_________________________________________________________________


In [50]:
import time
start = time.time()

model.fit(train_generator,epochs=25, verbose=0)

end = time.time()
total_time = end - start
minutes = int(total_time // 60)
seconds = int(total_time % 60)

print(f"Toplam Eğitim Süresi: {minutes} dakika {seconds} saniye")

Toplam Eğitim Süresi: 2 dakika 33 saniye


In [51]:
first_eval_batch = scaled_train[-52:]
first_eval_batch = first_eval_batch.reshape((1, n_input, n_features))

model.predict(first_eval_batch)



array([[0.78865284]], dtype=float32)

In [52]:
def lstm_prediction(model, scaled_train, n_input, n_predictions):
    import warnings
    warnings.filterwarnings("ignore")
    
    test_predictions = []

    first_eval_batch = scaled_train[-n_input:]
    current_batch = first_eval_batch.reshape((1, n_input, n_features))

    for i in range(n_predictions):
        
        # get the prediction value for the first batch
        current_pred = model.predict(current_batch, verbose=0)[0]
        
        # append the prediction into the array
        test_predictions.append(current_pred) 
        
        # use the prediction to update the batch and remove the first value
        current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1)
        
    true_predictions = scaler.inverse_transform(test_predictions)
    
    return true_predictions

In [53]:
lstm_preds = lstm_prediction(model, scaled_train, n_input, scaled_test.shape[0])

In [54]:
test_df = test[['target']].copy()
test_df['lstm_preds'] = lstm_preds
test_df.head(3)

Unnamed: 0,target,lstm_preds
441,11328,10314.978657
442,10475,10381.036951
443,10444,10439.033264


In [55]:
import plotly.express as px

fig = px.line(test_df, x=test_df.index, y=['target','lstm_preds'])
fig.show()

## Machine Learning

In [64]:
def dataset_preparer(df):
    import pandas as pd 
    import numpy as np
    
    df['period'] = pd.to_datetime(df['period'])

    df['year'] = df['period'].dt.year
    df['month'] = df['period'].dt.month
    df['week'] = df['period'].dt.isocalendar().week
    df['day_of_year'] = df['period'].dt.dayofyear
    df['day_of_month'] = df['period'].dt.day
    df['quarter'] = df['period'].dt.quarter

    import holidays
    from datetime import datetime, timedelta

    def haftada_tatil_gunu_sayisi(tarih):

        tatil_takvimi = holidays.Turkey()

        hafta_numarasi = tarih.isocalendar()[1]
        
        baslangic_tarihi = tarih - timedelta(days=tarih.weekday())
        bitis_tarihi = baslangic_tarihi + timedelta(days=6)
        
        tatil_sayisi = 0
        for gun in range((bitis_tarihi - baslangic_tarihi).days + 1):
            kontrol_tarihi = baslangic_tarihi + timedelta(days=gun)
            if kontrol_tarihi in tatil_takvimi:
                tatil_sayisi += 1
        
        return tatil_sayisi
    
    df['Haftada_Tatil_Sayisi'] = df['period'].apply(haftada_tatil_gunu_sayisi)
    
    print("Veri Setini Standartlaştırmayı Unutmayınız...")
    
    return df

In [65]:
train = dataset_preparer(train)
test = dataset_preparer(test)

Veri Setini Standartlaştırmayı Unutmayınız...
Veri Setini Standartlaştırmayı Unutmayınız...


In [68]:
X_train = train.drop(['period','target'], axis=1)
y_train = train['target']

X_test = test.drop(['period','target'], axis=1)
y_test = test['target']

In [69]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, max_depth=20,
                            max_features=7, min_samples_leaf=1,
                            min_samples_split=2, bootstrap=0)
model.fit(X_train, y_train)

In [72]:
rf_pred = model.predict(X_test)

In [73]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
mape = mean_absolute_percentage_error(y_test, rf_pred)
mae = mean_absolute_error(y_test, rf_pred)

print(f"""Tuned Random Forest Regressor
---------------------------
• RMSE: {rmse}
• MAE: {mae}
• MAPE: {mape}
        """)

Tuned Random Forest Regressor
---------------------------
• RMSE: 692.4703600630584
• MAE: 461.2080079298721
• MAPE: 0.05059273763536493
        


In [None]:
train = df.iloc[:-52,:]
test = df.iloc[-52:,:]

train = dataset_preparer(train)
test = dataset_preparer(test)

train['lstm_preds'] = 