In [92]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [93]:
df_energy = pd.read_csv('dataset/energy_dataset.csv')
df_weather = pd.read_csv('dataset/weather_features.csv')

# Energy info

In [94]:
df_energy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35064 entries, 0 to 35063
Data columns (total 29 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   time                                         35064 non-null  object 
 1   generation biomass                           35045 non-null  float64
 2   generation fossil brown coal/lignite         35046 non-null  float64
 3   generation fossil coal-derived gas           35046 non-null  float64
 4   generation fossil gas                        35046 non-null  float64
 5   generation fossil hard coal                  35046 non-null  float64
 6   generation fossil oil                        35045 non-null  float64
 7   generation fossil oil shale                  35046 non-null  float64
 8   generation fossil peat                       35046 non-null  float64
 9   generation geothermal                        35046 non-null  float64
 10

In [95]:
df_energy.describe()

Unnamed: 0,generation biomass,generation fossil brown coal/lignite,generation fossil coal-derived gas,generation fossil gas,generation fossil hard coal,generation fossil oil,generation fossil oil shale,generation fossil peat,generation geothermal,generation hydro pumped storage aggregated,...,generation waste,generation wind offshore,generation wind onshore,forecast solar day ahead,forecast wind offshore eday ahead,forecast wind onshore day ahead,total load forecast,total load actual,price day ahead,price actual
count,35045.0,35046.0,35046.0,35046.0,35046.0,35045.0,35046.0,35046.0,35046.0,0.0,...,35045.0,35046.0,35046.0,35064.0,0.0,35064.0,35064.0,35028.0,35064.0,35064.0
mean,383.51354,448.059208,0.0,5622.737488,4256.065742,298.319789,0.0,0.0,0.0,,...,269.452133,0.0,5464.479769,1439.066735,,5471.216689,28712.129962,28696.939905,49.874341,57.884023
std,85.353943,354.56859,0.0,2201.830478,1961.601013,52.520673,0.0,0.0,0.0,,...,50.195536,0.0,3213.691587,1677.703355,,3176.312853,4594.100854,4574.98795,14.6189,14.204083
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,0.0,0.0,0.0,0.0,,237.0,18105.0,18041.0,2.06,9.33
25%,333.0,0.0,0.0,4126.0,2527.0,263.0,0.0,0.0,0.0,,...,240.0,0.0,2933.0,69.0,,2979.0,24793.75,24807.75,41.49,49.3475
50%,367.0,509.0,0.0,4969.0,4474.0,300.0,0.0,0.0,0.0,,...,279.0,0.0,4849.0,576.0,,4855.0,28906.0,28901.0,50.52,58.02
75%,433.0,757.0,0.0,6429.0,5838.75,330.0,0.0,0.0,0.0,,...,310.0,0.0,7398.0,2636.0,,7353.0,32263.25,32192.0,60.53,68.01
max,592.0,999.0,0.0,20034.0,8359.0,449.0,0.0,0.0,0.0,,...,357.0,0.0,17436.0,5836.0,,17430.0,41390.0,41015.0,101.99,116.8


In [96]:
df_energy = df_energy.fillna(0)

In [97]:
df_energy_filtered = df_energy[df_energy.columns[(df_energy != 0).any()]]
df_energy_filtered.columns

Index(['time', 'generation biomass', 'generation fossil brown coal/lignite',
       'generation fossil gas', 'generation fossil hard coal',
       'generation fossil oil', 'generation hydro pumped storage consumption',
       'generation hydro run-of-river and poundage',
       'generation hydro water reservoir', 'generation nuclear',
       'generation other', 'generation other renewable', 'generation solar',
       'generation waste', 'generation wind onshore',
       'forecast solar day ahead', 'forecast wind onshore day ahead',
       'total load forecast', 'total load actual', 'price day ahead',
       'price actual'],
      dtype='object')

# Weather info

In [98]:
df_weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178396 entries, 0 to 178395
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   dt_iso               178396 non-null  object 
 1   city_name            178396 non-null  object 
 2   temp                 178396 non-null  float64
 3   temp_min             178396 non-null  float64
 4   temp_max             178396 non-null  float64
 5   pressure             178396 non-null  int64  
 6   humidity             178396 non-null  int64  
 7   wind_speed           178396 non-null  int64  
 8   wind_deg             178396 non-null  int64  
 9   rain_1h              178396 non-null  float64
 10  rain_3h              178396 non-null  float64
 11  snow_3h              178396 non-null  float64
 12  clouds_all           178396 non-null  int64  
 13  weather_id           178396 non-null  int64  
 14  weather_main         178396 non-null  object 
 15  weather_descripti

In [99]:
unique_cities = df_weather['city_name'].unique()
print(unique_cities)

['Valencia' 'Madrid' 'Bilbao' ' Barcelona' 'Seville']


In [100]:
valencia_weather = df_weather.loc[df_weather['city_name']=='Valencia']

In [101]:
valencia_weather.describe()

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id
count,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0,35145.0
mean,290.78078,290.222277,291.355025,1015.973794,65.145113,2.692815,160.75382,0.035924,0.000226,0.000154,20.820999,781.228283
std,7.231284,7.087512,7.519891,11.927677,19.689276,2.581825,120.436402,0.267955,0.003531,0.011898,25.658433,79.759832
min,268.830656,268.830656,268.830656,969.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,200.0
25%,285.15,285.15,285.65,1012.0,51.0,1.0,50.0,0.0,0.0,0.0,0.0,800.0
50%,290.17,290.15,291.15,1017.0,67.0,2.0,130.0,0.0,0.0,0.0,20.0,800.0
75%,296.15,295.37,297.15,1021.0,82.0,4.0,280.0,0.0,0.0,0.0,20.0,801.0
max,311.15,311.15,314.82,1087.0,100.0,133.0,360.0,12.0,0.1,1.125,100.0,804.0


In [102]:
def kelvin_to_celsius(kelvin):
    celsius = kelvin - 273.15
    return celsius

# Example usage:
kelvin_temperature = 298.15  # Example temperature in Kelvin
celsius_temperature = kelvin_to_celsius(kelvin_temperature)
print(f"{kelvin_temperature} Kelvin is equal to {celsius_temperature} degrees Celsius.")

298.15 Kelvin is equal to 25.0 degrees Celsius.


In [103]:
# Apply the function to the column
valencia_weather['temp_C'] = valencia_weather['temp'].apply(kelvin_to_celsius)

valencia_weather

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valencia_weather['temp_C'] = valencia_weather['temp'].apply(kelvin_to_celsius)


Unnamed: 0,dt_iso,city_name,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon,temp_C
0,2015-01-01 00:00:00+01:00,Valencia,270.475,270.475,270.475,1001,77,1,62,0.0,0.0,0.0,0,800,clear,sky is clear,01n,-2.675
1,2015-01-01 01:00:00+01:00,Valencia,270.475,270.475,270.475,1001,77,1,62,0.0,0.0,0.0,0,800,clear,sky is clear,01n,-2.675
2,2015-01-01 02:00:00+01:00,Valencia,269.686,269.686,269.686,1002,78,0,23,0.0,0.0,0.0,0,800,clear,sky is clear,01n,-3.464
3,2015-01-01 03:00:00+01:00,Valencia,269.686,269.686,269.686,1002,78,0,23,0.0,0.0,0.0,0,800,clear,sky is clear,01n,-3.464
4,2015-01-01 04:00:00+01:00,Valencia,269.686,269.686,269.686,1002,78,0,23,0.0,0.0,0.0,0,800,clear,sky is clear,01n,-3.464
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35140,2018-12-31 19:00:00+01:00,Valencia,285.640,285.150,286.150,1028,62,2,140,0.0,0.0,0.0,0,800,clear,sky is clear,01n,12.490
35141,2018-12-31 20:00:00+01:00,Valencia,283.140,282.150,284.150,1029,71,1,242,0.0,0.0,0.0,0,800,clear,sky is clear,01n,9.990
35142,2018-12-31 21:00:00+01:00,Valencia,281.660,281.150,282.150,1029,81,3,300,0.0,0.0,0.0,0,800,clear,sky is clear,01n,8.510
35143,2018-12-31 22:00:00+01:00,Valencia,280.140,279.150,281.150,1029,81,2,310,0.0,0.0,0.0,0,800,clear,sky is clear,01n,6.990


In [104]:
unique_temp = valencia_weather['temp_C'].unique()
print(unique_temp)

[-2.675 -3.464 -2.858 ...  5.51   3.48   2.51 ]


In [105]:
df_weather.head(5)

Unnamed: 0,dt_iso,city_name,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,2015-01-01 00:00:00+01:00,Valencia,270.475,270.475,270.475,1001,77,1,62,0.0,0.0,0.0,0,800,clear,sky is clear,01n
1,2015-01-01 01:00:00+01:00,Valencia,270.475,270.475,270.475,1001,77,1,62,0.0,0.0,0.0,0,800,clear,sky is clear,01n
2,2015-01-01 02:00:00+01:00,Valencia,269.686,269.686,269.686,1002,78,0,23,0.0,0.0,0.0,0,800,clear,sky is clear,01n
3,2015-01-01 03:00:00+01:00,Valencia,269.686,269.686,269.686,1002,78,0,23,0.0,0.0,0.0,0,800,clear,sky is clear,01n
4,2015-01-01 04:00:00+01:00,Valencia,269.686,269.686,269.686,1002,78,0,23,0.0,0.0,0.0,0,800,clear,sky is clear,01n


Joining energy and weather df

In [106]:
# Drop the 'city' column
weather_df_without_city = df_weather.drop(columns=['city_name', 'weather_id', 'weather_main', 'weather_description', 'weather_icon'])

# Group by 'dt_iso' and calculate the mean for each column
result_df = weather_df_without_city.groupby('dt_iso').mean().reset_index()

# Display the result
result_df.describe()

Unnamed: 0,temp,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all
count,35064.0,35064.0,35064.0,35064.0,35064.0,35064.0,35064.0,35064.0,35064.0,35064.0,35064.0
mean,289.680183,288.396642,291.147454,1070.205996,68.217013,2.467529,166.614552,0.073212,0.000386,0.004846,24.737415
std,7.261069,7.121843,7.507959,2692.26647,14.890167,1.357751,57.493555,0.202562,0.00336,0.101103,17.306731
min,271.941863,270.55,271.941863,812.4,22.6,0.0,0.0,0.0,0.0,0.0,0.0
25%,284.06,282.95,285.35,1012.8,56.8,1.4,126.4,0.0,0.0,0.0,12.0
50%,289.025,287.75,290.55,1017.0,69.8,2.2,166.0,0.0,0.0,0.0,22.4
75%,295.086,293.61,296.75,1021.0,80.2,3.2,207.8,0.06,0.0,0.0,35.0
max,309.266,308.55,313.548,202469.0,100.0,29.8,338.0,4.15,0.463,4.3,93.6


In [107]:
# Merge DataFrames based on 'iso_dt'
merged_df = pd.merge(df_energy, result_df, left_on='time', right_on='dt_iso')

# Display the merged DataFrame
merged_df.head(5)

Unnamed: 0,time,generation biomass,generation fossil brown coal/lignite,generation fossil coal-derived gas,generation fossil gas,generation fossil hard coal,generation fossil oil,generation fossil oil shale,generation fossil peat,generation geothermal,...,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all
0,2015-01-01 00:00:00+01:00,447.0,329.0,0.0,4844.0,4821.0,162.0,0.0,0.0,0.0,...,272.491463,272.491463,1016.4,82.4,2.0,135.2,0.0,0.0,0.0,0.0
1,2015-01-01 01:00:00+01:00,449.0,328.0,0.0,5196.0,4755.0,158.0,0.0,0.0,0.0,...,272.5127,272.5127,1016.2,82.4,2.0,135.8,0.0,0.0,0.0,0.0
2,2015-01-01 02:00:00+01:00,448.0,323.0,0.0,4857.0,4581.0,157.0,0.0,0.0,0.0,...,272.099137,272.099137,1016.8,82.0,2.4,119.0,0.0,0.0,0.0,0.0
3,2015-01-01 03:00:00+01:00,438.0,254.0,0.0,4314.0,4131.0,160.0,0.0,0.0,0.0,...,272.089469,272.089469,1016.6,82.0,2.4,119.2,0.0,0.0,0.0,0.0
4,2015-01-01 04:00:00+01:00,428.0,187.0,0.0,4130.0,3840.0,156.0,0.0,0.0,0.0,...,272.1459,272.1459,1016.6,82.0,2.4,118.4,0.0,0.0,0.0,0.0


In [108]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35064 entries, 0 to 35063
Data columns (total 41 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   time                                         35064 non-null  object 
 1   generation biomass                           35064 non-null  float64
 2   generation fossil brown coal/lignite         35064 non-null  float64
 3   generation fossil coal-derived gas           35064 non-null  float64
 4   generation fossil gas                        35064 non-null  float64
 5   generation fossil hard coal                  35064 non-null  float64
 6   generation fossil oil                        35064 non-null  float64
 7   generation fossil oil shale                  35064 non-null  float64
 8   generation fossil peat                       35064 non-null  float64
 9   generation geothermal                        35064 non-null  float64
 10

In [109]:
# Convert the string to a pandas datetime object
merged_df['time'] = pd.to_datetime(merged_df['time'], utc=True)

In [110]:
merged_df_2015_to_2017 = merged_df.loc[merged_df['time']<'2018-01-01']

In [111]:
max_datetime = merged_df_2015_to_2017['time'].max()

# Display the result
print("Maximum Datetime Value:", max_datetime)

Maximum Datetime Value: 2017-12-31 23:00:00+00:00


In [112]:
final_df = merged_df_2015_to_2017.drop(columns=['time','dt_iso'])

In [113]:
final_df.head(5)

Unnamed: 0,generation biomass,generation fossil brown coal/lignite,generation fossil coal-derived gas,generation fossil gas,generation fossil hard coal,generation fossil oil,generation fossil oil shale,generation fossil peat,generation geothermal,generation hydro pumped storage aggregated,...,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,rain_1h,rain_3h,snow_3h,clouds_all
0,447.0,329.0,0.0,4844.0,4821.0,162.0,0.0,0.0,0.0,0.0,...,272.491463,272.491463,1016.4,82.4,2.0,135.2,0.0,0.0,0.0,0.0
1,449.0,328.0,0.0,5196.0,4755.0,158.0,0.0,0.0,0.0,0.0,...,272.5127,272.5127,1016.2,82.4,2.0,135.8,0.0,0.0,0.0,0.0
2,448.0,323.0,0.0,4857.0,4581.0,157.0,0.0,0.0,0.0,0.0,...,272.099137,272.099137,1016.8,82.0,2.4,119.0,0.0,0.0,0.0,0.0
3,438.0,254.0,0.0,4314.0,4131.0,160.0,0.0,0.0,0.0,0.0,...,272.089469,272.089469,1016.6,82.0,2.4,119.2,0.0,0.0,0.0,0.0
4,428.0,187.0,0.0,4130.0,3840.0,156.0,0.0,0.0,0.0,0.0,...,272.1459,272.1459,1016.6,82.0,2.4,118.4,0.0,0.0,0.0,0.0


In [114]:
final_df.columns

Index(['generation biomass', 'generation fossil brown coal/lignite',
       'generation fossil coal-derived gas', 'generation fossil gas',
       'generation fossil hard coal', 'generation fossil oil',
       'generation fossil oil shale', 'generation fossil peat',
       'generation geothermal', 'generation hydro pumped storage aggregated',
       'generation hydro pumped storage consumption',
       'generation hydro run-of-river and poundage',
       'generation hydro water reservoir', 'generation marine',
       'generation nuclear', 'generation other', 'generation other renewable',
       'generation solar', 'generation waste', 'generation wind offshore',
       'generation wind onshore', 'forecast solar day ahead',
       'forecast wind offshore eday ahead', 'forecast wind onshore day ahead',
       'total load forecast', 'total load actual', 'price day ahead',
       'price actual', 'temp', 'temp_min', 'temp_max', 'pressure', 'humidity',
       'wind_speed', 'wind_deg', 'rain_1h

In [115]:
# Separar features e variável target
features = final_df.drop('price actual', axis=1)
target = final_df['price actual']

# Dividir os dados em conjuntos de treinamento e teste
X_train, x_test, Y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

LINEAR REGRESSION MODEL

In [116]:
# Criar um regressor Linear
lin_reg = LinearRegression()

# Fit the model to the data
lin_reg.fit(X_train, Y_train)

# Make predictions on the testing data
y_pred = lin_reg.predict(x_test)

# Avaliar o desempenho do modelo
r2 = r2_score(y_test, y_pred)
print(f'R2 Score: {r2}')
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

R2 Score: 0.6249609365904643
Mean Absolute Error: 6.2150939939348
Mean Squared Error: 80.0941619609658


In [117]:
# Criar um regressor XGBoost
regressor = xgb.XGBRegressor()

# Treinar o regressor no conjunto de treinamento
regressor.fit(X_train, Y_train)

# Fazer previsões no conjunto de teste
y_pred = regressor.predict(x_test)

# Avaliar o desempenho do modelo
r2 = r2_score(y_test, y_pred)
print(f'R2 Score: {r2}')
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

R2 Score: 0.8917593644728593
Mean Absolute Error: 3.284765689503985
Mean Squared Error: 23.11610666327259


In [118]:
# Criar um regressor Random Forest
random_forest = RandomForestRegressor()

# Treinar o regressor no conjunto de treinamento
random_forest.fit(X_train, Y_train)

# Fazer previsões no conjunto de teste
y_pred = random_forest.predict(x_test)

# Avaliar o desempenho do modelo
r2 = r2_score(y_test, y_pred)
print(f'R2 Score: {r2}')
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

R2 Score: 0.8843888943778977
Mean Absolute Error: 3.004782550845847
Mean Squared Error: 24.69016036356586


In [119]:
import pickle

# Save the model to a file
with open('your_model.pickle', 'wb') as f:
    pickle.dump(regressor, f)

In [120]:
with open('your_model.pickle', 'rb') as f:
    new_model = pickle.load(f)

In [121]:
#ESTAMOS EM JANEIRO/2018

In [122]:
merged_df_jan_2018 = merged_df.loc[merged_df['time']>='2018-01-01']
merged_df_jan_2018 = merged_df_jan_2018.loc[merged_df['time']<'2018-02-01']

In [123]:
max_datetime = merged_df_jan_2018['time'].max()

# Display the result
print("Maximum Datetime Value:", max_datetime)

Maximum Datetime Value: 2018-01-31 23:00:00+00:00


In [124]:
final_df = merged_df_jan_2018.drop(columns=['time','dt_iso'])

new_data_df = final_df.drop('price actual', axis=1)

In [125]:
new_data_df.columns

Index(['generation biomass', 'generation fossil brown coal/lignite',
       'generation fossil coal-derived gas', 'generation fossil gas',
       'generation fossil hard coal', 'generation fossil oil',
       'generation fossil oil shale', 'generation fossil peat',
       'generation geothermal', 'generation hydro pumped storage aggregated',
       'generation hydro pumped storage consumption',
       'generation hydro run-of-river and poundage',
       'generation hydro water reservoir', 'generation marine',
       'generation nuclear', 'generation other', 'generation other renewable',
       'generation solar', 'generation waste', 'generation wind offshore',
       'generation wind onshore', 'forecast solar day ahead',
       'forecast wind offshore eday ahead', 'forecast wind onshore day ahead',
       'total load forecast', 'total load actual', 'price day ahead', 'temp',
       'temp_min', 'temp_max', 'pressure', 'humidity', 'wind_speed',
       'wind_deg', 'rain_1h', 'rain_3h', 's

In [126]:
# Fazer previsão de jan/2018
y_pred_jan2018 = new_model.predict(new_data_df)

In [135]:
y_pred_jan2018

array([40.129154, 25.077103, 24.908602, 30.081833, 31.949383, 30.38437 ,
       24.724167, 31.817085, 24.81653 , 33.02489 , 33.296295, 33.22961 ,
       26.703718, 28.733408, 33.726234, 25.15155 , 24.509287, 30.293486,
       34.22684 , 33.378136, 34.3274  , 37.97991 , 33.156387, 51.322628,
       60.3444  , 57.56548 , 59.66467 , 58.17391 , 55.102585, 70.193085,
       72.19834 , 74.040535, 73.26522 , 77.32073 , 71.25635 , 74.49389 ,
       70.94134 , 62.47294 , 62.47159 , 60.70147 , 58.06317 , 73.68955 ,
       75.26919 , 75.97706 , 75.29404 , 71.198555, 71.734406, 63.587746,
       59.399628, 56.849026, 58.4125  , 54.600178, 58.181614, 54.68711 ,
       70.106445, 74.72269 , 82.85249 , 83.90794 , 78.092186, 70.00913 ,
       65.6188  , 73.39632 , 72.33461 , 62.7129  , 64.14202 , 62.328747,
       66.27797 , 72.387   , 77.57004 , 77.2317  , 61.917324, 46.367886,
       40.244175, 41.787502, 43.056908, 33.16921 , 40.0819  , 34.972755,
       33.164474, 35.08806 , 48.037712, 47.594593, 

In [128]:
#SUPONDO QUE ENTRAMOS EM FEVEREIRO

In [129]:
jan_2018 = merged_df.loc[merged_df['time']>='2018-01-01']
jan_2018 = jan_2018.loc[merged_df['time']<'2018-02-01']
jan_2018 = jan_2018['price actual']

In [136]:
jan_2018.head(5)

26305    19.02
26306    17.90
26307    20.88
26308    18.83
26309    15.80
Name: price actual, dtype: float64

In [130]:
# Avaliar o desempenho do modelo
r2 = r2_score(jan_2018, y_pred_jan2018)
print(f'R2 Score: {r2}')
mae = mean_absolute_error(jan_2018, y_pred_jan2018)
print(f'Mean Absolute Error: {mae}')
mse = mean_squared_error(jan_2018, y_pred_jan2018)
print(f'Mean Squared Error: {mse}')

R2 Score: 0.34985180081460476
Mean Absolute Error: 8.057985676898753
Mean Squared Error: 116.23347637511822


In [131]:
#MONITORAMENTO
#2015 - 2017 (BASE)
#trimestres para 2018 (1T, 2T, 3T, 4T) - novos 2160 dados
#Otimizar modelo com gridsearch e vendo feature importance