# Master file compiling all sources

In [1]:
import pandas as pd
import datetime

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) # to avoid deprecation warnings

# Create Master file, starting with electricity consumption

In [2]:
master = pd.read_csv('0_2_conso_elec/conso_elec_quotidienne.csv')

In [3]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),Stockage batterie
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,0.0
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,0.0
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,0.0
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,0.0
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,0.0


In [4]:
master['Date_index'] = pd.to_datetime(master['Date'])
master['year'] = master['Date_index'].dt.year
master['month'] = master['Date_index'].dt.month

In [5]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),Stockage batterie,Date_index,year,month
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,0.0,2013-01-01,2013,1
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,0.0,2013-01-01,2013,1
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,0.0,2013-01-01,2013,1
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,0.0,2013-01-01,2013,1
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,0.0,2013-01-01,2013,1


# Create Oil price database

In [6]:
oil = pd.read_excel('0_1_prices/RBRTEd.xls')

In [7]:
oil.head()

Unnamed: 0,Date,Europe Brent Spot Price FOB (Dollars per Barrel)
0,1987-05-20,18.63
1,1987-05-21,18.45
2,1987-05-22,18.55
3,1987-05-25,18.6
4,1987-05-26,18.63


In [8]:
oil['Date_index'] = pd.to_datetime(oil['Date'])
oil_filtered = oil.loc[(oil['Date'] >= '2013-01-01') & (oil['Date'] < '2023-01-03')]
del oil_filtered['Date']

- merge with master dataset

In [9]:
master = master.merge(oil_filtered, on='Date_index', how='outer')

# Create Electricity price dataset and merge with master

In [10]:
electricity = pd.read_csv('0_1_prices/ten00117_linear.csv')

In [11]:
electricity.head()

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,product,currency,unit,indic_en,geo,TIME_PERIOD,OBS_VALUE,OBS_FLAG
0,ESTAT:TEN00117(1.0),20/12/22 23:00:00,A,6000,EUR,KWH,MSHH,AL,2011,0.1152,
1,ESTAT:TEN00117(1.0),20/12/22 23:00:00,A,6000,EUR,KWH,MSHH,AL,2012,0.1163,
2,ESTAT:TEN00117(1.0),20/12/22 23:00:00,A,6000,EUR,KWH,MSHH,AL,2013,0.1156,
3,ESTAT:TEN00117(1.0),20/12/22 23:00:00,A,6000,EUR,KWH,MSHH,AL,2014,0.1156,
4,ESTAT:TEN00117(1.0),20/12/22 23:00:00,A,6000,EUR,KWH,MSHH,AL,2015,0.0812,


In [12]:
electricity_fr = electricity[electricity['geo'] == 'FR'] #limitation to France
electricity_fr = electricity_fr[electricity_fr['indic_en'] == 'MSHH'] # only price of electricity kept = MSHH (price to industries removed with value MSIND)
electricity_fr.drop(electricity_fr.iloc[:,0:8], axis=1, inplace=True)
del electricity_fr['OBS_FLAG'] # data in or not => flag removed
electricity_fr['TIME_PERIOD'] = electricity_fr['TIME_PERIOD'].apply(lambda x : str(x) + '-01-01') # add -01-01 in view of future merge of dataset
electricity_fr['Date_index'] = pd.to_datetime(electricity_fr['TIME_PERIOD'])
electricity_fr = electricity_fr.loc[(electricity_fr['Date_index'] >'2012-01-01')] #limitation on time
electricity_fr['year'] = electricity_fr['Date_index'].dt.year #date format


In [13]:
electricity_fr.head()

Unnamed: 0,TIME_PERIOD,OBS_VALUE,Date_index,year
181,2013-01-01,0.1524,2013-01-01,2013
182,2014-01-01,0.1585,2014-01-01,2014
183,2015-01-01,0.1676,2015-01-01,2015
184,2016-01-01,0.1685,2016-01-01,2016
185,2017-01-01,0.1704,2017-01-01,2017


- Merge with master dataset

In [14]:
master = pd.merge(master, electricity_fr, how='left', on='year')

# Create gas dataset and merge with master

In [15]:
gas = pd.read_csv('0_1_prices/prix_gaz_naturel.csv', sep=';')

In [16]:
gas.head()

Unnamed: 0,Date,PEG
0,2013-01,27.729032
1,2013-02,28.889286
2,2013-03,33.099677
3,2013-04,30.764833
4,2013-05,28.856452


In [17]:
gas['Date_index'] = pd.to_datetime(gas['Date'])
gas['year'] = gas['Date_index'].dt.year
gas['month'] = gas['Date_index'].dt.month

In [18]:
gas.head()

Unnamed: 0,Date,PEG,Date_index,year,month
0,2013-01,27.729032,2013-01-01,2013,1
1,2013-02,28.889286,2013-02-01,2013,2
2,2013-03,33.099677,2013-03-01,2013,3
3,2013-04,30.764833,2013-04-01,2013,4
4,2013-05,28.856452,2013-05-01,2013,5


In [19]:
master['gas_key'] = master['year'].astype(str) + master['month'].astype(str) # creating key year + month for future merge gas + master file
gas['gas_key'] = gas['year'].astype(str) + gas['month'].astype(str) # creating key year + month for future merge gas + master file

In [20]:
del gas['Date']
del gas['Date_index']
del gas['year']
del gas['month']

In [21]:
master = pd.merge(master, gas, on='gas_key', how='left')

In [22]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),Stockage batterie,Date_index_x,year,month,Europe Brent Spot Price FOB (Dollars per Barrel),TIME_PERIOD,OBS_VALUE,Date_index_y,gas_key,PEG
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,0.0,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,0.0,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,0.0,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,0.0,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,0.0,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032


# Adding weather information

In [23]:
weather = pd.read_csv('0_0_weather/weather.csv')

In [24]:
weather.head()

Unnamed: 0.1,Unnamed: 0,code_region,time,temp_max,temp_min,hours_of_sun,precipitation,windspeed,region_nom,key
0,0,1,2013-01-01,27.6,23.4,19.26,1.1,24.2,GUADELOUPE,2013-01-011
1,1,1,2013-01-02,27.7,23.8,18.26,0.0,26.3,GUADELOUPE,2013-01-021
2,2,1,2013-01-03,26.7,24.3,15.4,1.6,21.7,GUADELOUPE,2013-01-031
3,3,1,2013-01-04,27.5,24.1,19.22,0.1,26.3,GUADELOUPE,2013-01-041
4,4,1,2013-01-05,27.2,24.4,17.07,0.9,31.0,GUADELOUPE,2013-01-051


In [25]:
# Adding a key to masterfile to merge with weather
master['key'] = master['Date'].astype(str) + master['Code INSEE région'].astype(str)

In [26]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),...,Date_index_x,year,month,Europe Brent Spot Price FOB (Dollars per Barrel),TIME_PERIOD,OBS_VALUE,Date_index_y,gas_key,PEG,key
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,...,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032,2013-01-0111
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,...,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032,2013-01-0124
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,...,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032,2013-01-0127
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,...,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032,2013-01-0128
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,...,2013-01-01,2013,1,,2013-01-01,0.1524,2013-01-01,20131,27.729032,2013-01-0132


In [27]:
master = pd.merge(master, weather, on='key', how='left')

In [28]:
master.head()

Unnamed: 0.1,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),...,key,Unnamed: 0,code_region,time,temp_max,temp_min,hours_of_sun,precipitation,windspeed,region_nom
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,...,2013-01-0111,18300.0,11.0,2013-01-01,8.5125,3.4625,3.43875,8.6625,24.4,ILE DE FRANCE
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,...,2013-01-0124,21960.0,24.0,2013-01-01,8.833333,3.7,3.661667,6.266667,26.733333,CENTRE VAL DE LOIRE
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,...,2013-01-0127,25620.0,27.0,2013-01-01,7.725,3.4,1.115,10.25,24.6625,BOURGOGNE FRANCHE COMTE
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,...,2013-01-0128,29280.0,28.0,2013-01-01,9.04,4.56,4.102,4.98,22.76,NORMANDIE
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,...,2013-01-0132,32940.0,32.0,2013-01-01,8.36,3.44,3.516,7.18,28.38,HAUTS DE FRANCE


In [29]:
del master['Unnamed: 0']
del master['key']
del master['time']
del master['region_nom']
del master['code_region']

In [30]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),...,TIME_PERIOD,OBS_VALUE,Date_index_y,gas_key,PEG,temp_max,temp_min,hours_of_sun,precipitation,windspeed
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,...,2013-01-01,0.1524,2013-01-01,20131,27.729032,8.5125,3.4625,3.43875,8.6625,24.4
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,...,2013-01-01,0.1524,2013-01-01,20131,27.729032,8.833333,3.7,3.661667,6.266667,26.733333
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,...,2013-01-01,0.1524,2013-01-01,20131,27.729032,7.725,3.4,1.115,10.25,24.6625
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,...,2013-01-01,0.1524,2013-01-01,20131,27.729032,9.04,4.56,4.102,4.98,22.76
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,...,2013-01-01,0.1524,2013-01-01,20131,27.729032,8.36,3.44,3.516,7.18,28.38


# Tiddy up master

In [31]:
master['Date'] = pd.to_datetime(master['Date'])
master['day'] = master['Date'].dt.day
master['day_of_week'] = master['Date'].dt.day_of_week

In [32]:
master = master.rename(columns={'Europe Brent Spot Price FOB (Dollars per Barrel)':'brent_price' , 'OBS_VALUE':'prix_kwh_elec', 'PEG':'prix_gaz'})

In [33]:
del master['Date_index_x']
del master['Date_index_y']
del master['gas_key']

In [34]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),...,TIME_PERIOD,prix_kwh_elec,prix_gaz,temp_max,temp_min,hours_of_sun,precipitation,windspeed,day,day_of_week
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,...,2013-01-01,0.1524,27.729032,8.5125,3.4625,3.43875,8.6625,24.4,1,1
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,...,2013-01-01,0.1524,27.729032,8.833333,3.7,3.661667,6.266667,26.733333,1,1
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,...,2013-01-01,0.1524,27.729032,7.725,3.4,1.115,10.25,24.6625,1,1
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,...,2013-01-01,0.1524,27.729032,9.04,4.56,4.102,4.98,22.76,1,1
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,...,2013-01-01,0.1524,27.729032,8.36,3.44,3.516,7.18,28.38,1,1


In [35]:
# Add a column with corresponding region names for each region code

dic_code_regions = {11 : 'IDF', 
24 : 'Centre', 
27 : 'Bourgogne', 
28 : 'Normandie', 
32 : 'Hauts-de-France', 
44 : 'Grand Est', 
52 : 'Loire',
75 : 'Aquitaine',
76 : 'Occitanie',
93 : 'Provence',
84 : 'Auvergen',
53 : 'Bretagne'}

master['Nom_region'] = master['Code INSEE région'].map(dic_code_regions)


In [36]:
master.head()

Unnamed: 0,Date,Code INSEE région,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),Ech. physiques (MW),...,prix_kwh_elec,prix_gaz,temp_max,temp_min,hours_of_sun,precipitation,windspeed,day,day_of_week,Nom_region
0,2013-01-01,11,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,360051.0,...,0.1524,27.729032,8.5125,3.4625,3.43875,8.6625,24.4,1,1,IDF
1,2013-01-01,24,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,-355429.0,...,0.1524,27.729032,8.833333,3.7,3.661667,6.266667,26.733333,1,1,Centre
2,2013-01-01,27,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,89409.0,...,0.1524,27.729032,7.725,3.4,1.115,10.25,24.6625,1,1,Bourgogne
3,2013-01-01,28,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,-258889.0,...,0.1524,27.729032,9.04,4.56,4.102,4.98,22.76,1,1,Normandie
4,2013-01-01,32,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,-25001.0,...,0.1524,27.729032,8.36,3.44,3.516,7.18,28.38,1,1,Hauts-de-France


In [37]:
# moving col Nom_region closed to region code
_column = master.pop('Nom_region')
master.insert(2, 'Nom_region', _column)

In [38]:
master.head()

Unnamed: 0,Date,Code INSEE région,Nom_region,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),...,TIME_PERIOD,prix_kwh_elec,prix_gaz,temp_max,temp_min,hours_of_sun,precipitation,windspeed,day,day_of_week
0,2013-01-01,11,IDF,399392.0,32478.0,0.0,340.0,100.0,0.0,6333.0,...,2013-01-01,0.1524,27.729032,8.5125,3.4625,3.43875,8.6625,24.4,1,1
1,2013-01-01,24,Centre,103218.0,4236.0,438965.0,12262.0,357.0,1596.0,1150.0,...,2013-01-01,0.1524,27.729032,8.833333,3.7,3.661667,6.266667,26.733333,1,1
2,2013-01-01,27,Bourgogne,110467.0,10996.0,0.0,3737.0,57.0,5493.0,596.0,...,2013-01-01,0.1524,27.729032,7.725,3.4,1.115,10.25,24.6625,1,1
3,2013-01-01,28,Normandie,155860.0,20427.0,384145.0,6648.0,179.0,693.0,2523.0,...,2013-01-01,0.1524,27.729032,9.04,4.56,4.102,4.98,22.76,1,1
4,2013-01-01,32,Hauts-de-France,253829.0,38592.0,211166.0,25444.0,308.0,46.0,3130.0,...,2013-01-01,0.1524,27.729032,8.36,3.44,3.516,7.18,28.38,1,1


In [39]:
# Create copy of master dataset for machine learning
master_ml = master.copy()

In [40]:
del master_ml['Thermique (MW)']
del master_ml['Nucléaire (MW)']
del master_ml['Eolien (MW)']
del master_ml['Solaire (MW)']
del master_ml['Hydraulique (MW)']
del master_ml['Bioénergies (MW)']
del master_ml['Ech. physiques (MW)']
del master_ml['Stockage batterie']

In [41]:
# Remove row whith lots of missing data in the detail resulting in weird avg
master_final = master[master['Date'] != '2022-10-30']

# Feature engineering for machine learning: Add lag to datasets

In [42]:
# Function to create lag from target 'Consommation (MW)'
def lag(x):
    for i in range(15):
        x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)

    x['rolling_mean_7'] = x['Consommation (MW)'].rolling(window=7).mean()
    x['rolling_mean_15'] = x['Consommation (MW)'].rolling(window=15).mean()
    x['lag_364'] = x['Consommation (MW)'].shift(364)

    for i in range(1, 8):
        x[f'lag_inversed_{i}'] = x['Consommation (MW)'].shift(-i)
        
    x = x.dropna(subset=['lag_364'])

    return x

# Function to seperate dataset by region, lag it and concat it
def concator(x):
    reg_codes = x['Code INSEE région'].unique()
    data_list = []
    lag_data_list = []
    for i in reg_codes:
        data_list.append(x[x['Code INSEE région'] == i])
    for i in range(len(data_list)):
        lag_data_list.append(lag(data_list[i]))
    final = pd.concat(lag_data_list)
    return final


In [43]:
master_lag = concator(master_final)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)
A value is trying to be set on a copy of a slice 

In [44]:
master_lag.head()

Unnamed: 0,Date,Code INSEE région,Nom_region,Consommation (MW),Thermique (MW),Nucléaire (MW),Eolien (MW),Solaire (MW),Hydraulique (MW),Bioénergies (MW),...,rolling_mean_7,rolling_mean_15,lag_364,lag_inversed_1,lag_inversed_2,lag_inversed_3,lag_inversed_4,lag_inversed_5,lag_inversed_6,lag_inversed_7
4368,2013-12-31,11,IDF,476296.0,13005.0,0.0,395.0,71.0,48.0,6514.0,...,446431.142857,467695.0,399392.0,424366.0,455574.0,450735.0,425674.0,431730.0,470656.0,466931.0
4380,2014-01-01,11,IDF,424366.0,12694.0,0.0,655.0,41.0,48.0,7465.0,...,450683.571429,461028.2,492157.0,455574.0,450735.0,425674.0,431730.0,470656.0,466931.0,460424.0
4392,2014-01-02,11,IDF,455574.0,12185.0,0.0,598.0,155.0,99.0,7397.0,...,450867.571429,457468.0,487111.0,450735.0,425674.0,431730.0,470656.0,466931.0,460424.0,469782.0
4404,2014-01-03,11,IDF,450735.0,12003.0,0.0,818.0,124.0,123.0,7571.0,...,449652.0,454167.8,470053.0,425674.0,431730.0,470656.0,466931.0,460424.0,469782.0,494143.0
4416,2014-01-04,11,IDF,425674.0,12041.0,0.0,475.0,131.0,96.0,7528.0,...,450649.571429,448541.0,433732.0,431730.0,470656.0,466931.0,460424.0,469782.0,494143.0,454352.0


In [45]:
master_lag_ml = concator(master_ml)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x[f'lag_{i+1}'] = x['Consommation (MW)'].shift(i+1)
A value is trying to be set on a copy of a slice 

In [46]:
# Reset index to use date
master_lag = master_lag.set_index('Date')
master_lag_ml = master_lag_ml.set_index('Date')

In [47]:
master_lag = master_lag.rename(columns = {'Code INSEE région' : 'code_region'})
master_lag_ml = master_lag_ml.rename(columns = {'Code INSEE région' : 'code_region'})

In [48]:
master_lag.to_csv('master_lag_inversed.csv')

In [49]:
master_lag_ml.to_csv('master_lag_ml_inversed.csv')