In [1]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error

# Read The Data

In [2]:
data = pd.read_csv("train.csv")
data.head()

Unnamed: 0,Tarih,Dağıtılan Enerji (MWh)
0,2018-01-01 00:00:00,1593.944216
1,2018-01-01 01:00:00,1513.933887
2,2018-01-01 02:00:00,1402.612637
3,2018-01-01 03:00:00,1278.527266
4,2018-01-01 04:00:00,1220.697701


In [3]:
future_data = pd.read_csv("future.csv").drop('Unnamed: 0' , axis = 1)
future_data.head()

Unnamed: 0,Tarih,Dağıtılan Enerji (MWh)
0,2022-08-01 00:00:00,
1,2022-08-01 01:00:00,
2,2022-08-01 02:00:00,
3,2022-08-01 03:00:00,
4,2022-08-01 04:00:00,


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40152 entries, 0 to 40151
Data columns (total 2 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Tarih                   40152 non-null  object 
 1   Dağıtılan Enerji (MWh)  40152 non-null  float64
dtypes: float64(1), object(1)
memory usage: 627.5+ KB


## Holidays

In [5]:
hol = pd.read_csv('Calendar.csv', parse_dates=['CALENDAR_DATE'])
hol = hol[['CALENDAR_DATE','RAMADAN_FLAG','PUBLIC_HOLIDAY_FLAG']].rename(columns={'CALENDAR_DATE':'ds'})
hol['holiday'] = np.where((hol['RAMADAN_FLAG'] == 'Y') | (hol['PUBLIC_HOLIDAY_FLAG'] == 'Y'), 'TR-Holidays', 0)
hol = hol[['ds','holiday']]
hol = hol[hol['holiday'] == 'TR-Holidays']

In [6]:
hol

Unnamed: 0,ds,holiday
2,2024-12-29,TR-Holidays
3,2024-12-28,TR-Holidays
9,2024-12-22,TR-Holidays
10,2024-12-21,TR-Holidays
16,2024-12-15,TR-Holidays
...,...,...
11314,1994-01-09,TR-Holidays
11315,1994-01-08,TR-Holidays
11321,1994-01-02,TR-Holidays
11322,1994-01-01,TR-Holidays


## Target Transformations

In [8]:
data['Dağıtılan Enerji (MWh)'] = np.log1p(data['Dağıtılan Enerji (MWh)'])

In [9]:
data

Unnamed: 0,Tarih,Dağıtılan Enerji (MWh)
0,2018-01-01 00:00:00,7.374594
1,2018-01-01 01:00:00,7.323127
2,2018-01-01 02:00:00,7.246805
3,2018-01-01 03:00:00,7.154246
4,2018-01-01 04:00:00,7.107997
...,...,...
40147,2022-07-31 19:00:00,7.821438
40148,2022-07-31 20:00:00,7.816400
40149,2022-07-31 21:00:00,7.818437
40150,2022-07-31 22:00:00,7.782213


# Feature Engineering

In [10]:
data['Tarih'] = pd.to_datetime(data['Tarih'])

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40152 entries, 0 to 40151
Data columns (total 2 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Tarih                   40152 non-null  datetime64[ns]
 1   Dağıtılan Enerji (MWh)  40152 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 627.5 KB


In [12]:
columns = ["ds","y"]
data.columns = columns

In [13]:
data

Unnamed: 0,ds,y
0,2018-01-01 00:00:00,7.374594
1,2018-01-01 01:00:00,7.323127
2,2018-01-01 02:00:00,7.246805
3,2018-01-01 03:00:00,7.154246
4,2018-01-01 04:00:00,7.107997
...,...,...
40147,2022-07-31 19:00:00,7.821438
40148,2022-07-31 20:00:00,7.816400
40149,2022-07-31 21:00:00,7.818437
40150,2022-07-31 22:00:00,7.782213


# Train | Test Split

In [17]:
X_test = data.loc[data['ds'] > '2022-07-23 23:00:00'].copy()
X_train = data.loc[data['ds'] <= '2022-07-23 23:00:00'].copy()

In [15]:
# X_train, X_test = train_test_split(data, test_size=0.005 , shuffle= False , random_state= 53) # TimeSeries modellerde Shuffle= False olmak zorunda.

In [18]:
X_train

Unnamed: 0,ds,y
0,2018-01-01 00:00:00,7.374594
1,2018-01-01 01:00:00,7.323127
2,2018-01-01 02:00:00,7.246805
3,2018-01-01 03:00:00,7.154246
4,2018-01-01 04:00:00,7.107997
...,...,...
39955,2022-07-23 19:00:00,7.829668
39956,2022-07-23 20:00:00,7.808175
39957,2022-07-23 21:00:00,7.797434
39958,2022-07-23 22:00:00,7.743247


In [19]:
X_test

Unnamed: 0,ds,y
39960,2022-07-24 00:00:00,7.628562
39961,2022-07-24 01:00:00,7.566047
39962,2022-07-24 02:00:00,7.509662
39963,2022-07-24 03:00:00,7.467384
39964,2022-07-24 04:00:00,7.443561
...,...,...
40147,2022-07-31 19:00:00,7.821438
40148,2022-07-31 20:00:00,7.816400
40149,2022-07-31 21:00:00,7.818437
40150,2022-07-31 22:00:00,7.782213


# Scaling

In [None]:
# from sklearn.preprocessing import MinMaxScaler

In [None]:
# scaler = MinMaxScaler()
# X_train['y'] = scaler.fit_transform(X_train['y'].values.reshape(-1,1))
# X_test['y'] = scaler.transform(X_test['y'].values.reshape(-1,1))

In [None]:
# X_train

In [None]:
# X_test

# Prophet Modelling

In [20]:
m = Prophet(holidays= hol)
m.fit(X_train)

01:25:00 - cmdstanpy - INFO - Chain [1] start processing
01:25:22 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1fac1cb7a30>

In [21]:
y_pred = m.predict(X_test)[['ds' , 'yhat_lower' , 'yhat' , 'yhat_upper']]
y_pred

Unnamed: 0,ds,yhat_lower,yhat,yhat_upper
0,2022-07-24 00:00:00,7.383651,7.491587,7.597160
1,2022-07-24 01:00:00,7.303973,7.408600,7.528606
2,2022-07-24 02:00:00,7.238785,7.344494,7.449930
3,2022-07-24 03:00:00,7.187943,7.295285,7.403017
4,2022-07-24 04:00:00,7.157837,7.259857,7.372528
...,...,...,...,...
187,2022-07-31 19:00:00,7.616603,7.715136,7.822179
188,2022-07-31 20:00:00,7.610185,7.721206,7.827360
189,2022-07-31 21:00:00,7.607065,7.706397,7.816056
190,2022-07-31 22:00:00,7.552137,7.657646,7.757378


In [25]:
y_pred[['yhat_lower' , 'yhat' , 'yhat_upper']] = np.expm1(y_pred[['yhat_lower' , 'yhat' , 'yhat_upper']])
y_pred

Unnamed: 0,ds,yhat_lower,yhat,yhat_upper
0,2022-07-24 00:00:00,1608.455073,1791.894327,1991.529206
1,2022-07-24 01:00:00,1485.192440,1649.113758,1859.510647
2,2022-07-24 02:00:00,1391.400797,1546.651096,1718.742197
3,2022-07-24 03:00:00,1322.378547,1472.336230,1639.928072
4,2022-07-24 04:00:00,1283.130946,1421.052718,1590.652251
...,...,...,...,...
187,2022-07-31 19:00:00,2030.648651,2241.027138,2494.335701
188,2022-07-31 20:00:00,2017.652428,2254.677487,2507.299737
189,2022-07-31 21:00:00,2011.363089,2221.519721,2479.104605
190,2022-07-31 22:00:00,1903.809498,2115.768382,2337.763253


In [26]:
X_test['y'] = np.expm1(X_test['y'])
X_test['y']

39960    2055.091859
39961    1930.490383
39962    1824.596354
39963    1749.023063
39964    1707.824152
            ...     
40147    2492.489498
40148    2479.958245
40149    2485.016884
40150    2396.574255
40151    2285.252844
Name: y, Length: 192, dtype: float64

In [27]:
y_pred['yhat_upper']

0      1991.529206
1      1859.510647
2      1718.742197
3      1639.928072
4      1590.652251
          ...     
187    2494.335701
188    2507.299737
189    2479.104605
190    2337.763253
191    2175.308420
Name: yhat_upper, Length: 192, dtype: float64

In [28]:
def eval_metrics(y_true , y_pred):
    from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error , mean_absolute_percentage_error
    
    # MAPE hesaplama
    mape = mean_absolute_percentage_error(y_true, y_pred)
    
    # r2 hesaplama
    r2 = r2_score(y_true , y_pred)
    
    # mae hesaplama
    mae = mean_absolute_error(y_true , y_pred)

    # rmse hesaplama
    rmse = mean_squared_error(y_true,y_pred)**0.5
    
    print(f"""
          Mape Score : {mape}
          R2 Score : {r2}
          MAE Score : {mae}
          RMSE Score : {rmse}
          """)
    
def eval_plot(y_true , y_pred):
    tests = pd.DataFrame(data = y_true , columns=['Real Values'] , index = data[:-24].index)
    preds = pd.DataFrame(data = y_pred , columns=['Predicts'] , index = data[:-24].index)
    compare = pd.concat([tests[:-24], preds] , axis= 1)
    print(compare.plot())

def eval_df (y_true , y_pred):
    compare = pd.DataFrame({'Real Values': y_true, 'Predicts': y_pred}, index=X_test[:-24].index)
    print(compare)

def create_submission(future_preds, num):
    submission_df = pd.DataFrame({'Tarih': future_data['ds'], 'Dağıtılan Enerji (MWh)': future_preds})
    filename = 'submission{}.csv'.format(num)
    submission_df.to_csv(filename, index=False)
    globals()['submission{}'.format(num)] = submission_df

In [29]:
eval_metrics(X_test['y'] , y_pred['yhat_upper'])


          Mape Score : 0.040082462439916595
          R2 Score : 0.9209381564004899
          MAE Score : 93.10488408800313
          RMSE Score : 118.4301171631108
          


In [None]:
# eval_plot(X_test['y'] , y_pred['yhat'])

In [None]:
# eval_df(X_test['y'] , y_pred['yhat'])

# Future Prediction

In [30]:
data # Full data

Unnamed: 0,ds,y
0,2018-01-01 00:00:00,7.374594
1,2018-01-01 01:00:00,7.323127
2,2018-01-01 02:00:00,7.246805
3,2018-01-01 03:00:00,7.154246
4,2018-01-01 04:00:00,7.107997
...,...,...
40147,2022-07-31 19:00:00,7.821438
40148,2022-07-31 20:00:00,7.816400
40149,2022-07-31 21:00:00,7.818437
40150,2022-07-31 22:00:00,7.782213


In [31]:
future_data.rename(columns = {'Tarih' : 'ds' , 'Dağıtılan Enerji (MWh)' :'y'} , inplace=True)
future_data

Unnamed: 0,ds,y
0,2022-08-01 00:00:00,
1,2022-08-01 01:00:00,
2,2022-08-01 02:00:00,
3,2022-08-01 03:00:00,
4,2022-08-01 04:00:00,
...,...,...
739,2022-08-31 19:00:00,
740,2022-08-31 20:00:00,
741,2022-08-31 21:00:00,
742,2022-08-31 22:00:00,


In [None]:
m = Prophet(holidays= hol)
m.fit(data)

In [32]:
y_pred = m.predict(future_data)[['ds' , 'yhat_lower' , 'yhat' , 'yhat_upper']]
y_pred

Unnamed: 0,ds,yhat_lower,yhat,yhat_upper
0,2022-08-01 00:00:00,7.432377,7.542560,7.644083
1,2022-08-01 01:00:00,7.366253,7.468299,7.575319
2,2022-08-01 02:00:00,7.306517,7.412935,7.525720
3,2022-08-01 03:00:00,7.267732,7.372409,7.478258
4,2022-08-01 04:00:00,7.236287,7.345529,7.445374
...,...,...,...,...
739,2022-08-31 19:00:00,7.656242,7.761674,7.874916
740,2022-08-31 20:00:00,7.658997,7.765974,7.871608
741,2022-08-31 21:00:00,7.643165,7.748903,7.851959
742,2022-08-31 22:00:00,7.582293,7.697412,7.811432


In [33]:
y_pred[['yhat_lower' , 'yhat' , 'yhat_upper']] = np.expm1(y_pred[['yhat_lower' , 'yhat' , 'yhat_upper']])
y_pred

Unnamed: 0,ds,yhat_lower,yhat,yhat_upper
0,2022-08-01 00:00:00,1688.818916,1885.653129,2087.252822
1,2022-08-01 01:00:00,1580.696617,1750.625182,1948.481145
2,2022-08-01 02:00:00,1488.978832,1656.284051,1854.147547
3,2022-08-01 03:00:00,1432.295811,1590.462450,1768.156952
4,2022-08-01 04:00:00,1387.927552,1548.253614,1710.926228
...,...,...,...,...
739,2022-08-31 19:00:00,2112.798223,2347.833541,2629.464286
740,2022-08-31 20:00:00,2118.629580,2357.956081,2620.776816
741,2022-08-31 21:00:00,2085.336211,2318.027826,2569.766705
742,2022-08-31 22:00:00,1962.125436,2201.639796,2467.662746


In [34]:
y_pred['yhat_upper']

0      2087.252822
1      1948.481145
2      1854.147547
3      1768.156952
4      1710.926228
          ...     
739    2629.464286
740    2620.776816
741    2569.766705
742    2467.662746
743    2261.835051
Name: yhat_upper, Length: 744, dtype: float64

In [None]:
create_submission(y_pred['yhat_upper'] , 4)

In [None]:
# Done