In [7]:
import os
import netCDF4 as nc
import numpy as np
import pandas as pd

def calculate_weighted_avg_temperature(file_path):
    # Lire le fichier NetCDF
    dataset = nc.Dataset(file_path, mode='r')
    
    # Extraire les variables d'intérêt
    temperature = dataset.variables['t2m'][:]  # Remplacez par le nom exact de votre variable
    latitudes = dataset.variables['latitude'][:]  # Remplacez par le nom exact de votre variable
    longitudes = dataset.variables['longitude'][:]  # Remplacez par le nom exact de votre variable
    time_var = dataset.variables['time']
    
    # Convertir les temps en dates lisibles
    time_units = time_var.units
    time_calendar = time_var.calendar if 'calendar' in time_var.ncattrs() else 'standard'
    time_values = time_var[:]
    dates = nc.num2date(time_values, units=time_units, calendar=time_calendar)
    
    # Calculer les températures moyennes pondérées par heure
    average_temperatures = []
    
    for t, date in enumerate(dates):
        temp_slice = temperature[t, :, :]
        temp_flat = temp_slice.flatten()
        
        # Calculer la moyenne pondérée pour cette tranche temporelle
        avg_temp = np.average(temp_flat)
        average_temperatures.append([date, avg_temp])
    
    # Créer le DataFrame final avec les moyennes pondérées par heure
    df_weighted_avg_temp = pd.DataFrame(average_temperatures, columns=['date', 'average_temperature'])
    
    
    dataset.close()
    
    return df_weighted_avg_temp

# Chemin du répertoire contenant les fichiers NetCDF
directory_path = 'data/temperature'

# Lister tous les fichiers dans le répertoire et filtrer ceux avec l'extension .nc
file_paths = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.nc')]

# Appliquer la fonction à chaque fichier et concaténer les résultats
all_data = []

for path in file_paths:
    df = calculate_weighted_avg_temperature(path)
    all_data.append(df)

# Concaténer tous les DataFrames en un seul
final_df = pd.concat(all_data, ignore_index=True)

# Afficher les premières lignes du DataFrame final
final_df.head()


Unnamed: 0,date,average_temperature
0,2020-01-01 00:00:00,278.019598
1,2020-01-01 01:00:00,277.955716
2,2020-01-01 02:00:00,277.774103
3,2020-01-01 03:00:00,277.714403
4,2020-01-01 04:00:00,277.680769


In [9]:
final_df.set_index(['date'], inplace=True)

In [13]:
final_df.to_csv('data/temperature/temp.csv')

In [41]:
actualload2020=pd.read_csv('data/Actual Load/Total Load - Day Ahead _ Actual_202001010000-202101010000.csv')
path='data/Actual Load'
Afile_paths=[os.path.join(path, f) for f in os.listdir(path) if f.endswith('.csv')]
Afile_paths

['data/Actual Load\\Total Load - Day Ahead _ Actual_202001010000-202101010000.csv',
 'data/Actual Load\\Total Load - Day Ahead _ Actual_202101010000-202201010000.csv',
 'data/Actual Load\\Total Load - Day Ahead _ Actual_202201010000-202301010000.csv',
 'data/Actual Load\\Total Load - Day Ahead _ Actual_202301010000-202401010000.csv']

In [36]:
actualload2020.columns

Index(['Time (CET/CEST)', 'Day-ahead Total Load Forecast [MW] - BZN|ES',
       'Actual Total Load [MW] - BZN|ES'],
      dtype='object')

In [69]:
def extract_first_part(date_range):
    return date_range.split(' - ')[0]

In [70]:
def genere_actual_load(file_pa):
    df = pd.read_csv(file_pa)
    df.drop(columns=['Day-ahead Total Load Forecast [MW] - BZN|ES'], inplace=True)
    df.rename(columns={'Actual Total Load [MW] - BZN|ES': 'Actual Total Load'}, inplace=True)
    df['date']=df['Time (CET/CEST)'].apply(extract_first_part)
    df.drop(columns=['Time (CET/CEST)'], inplace=True)
    df.set_index(['date'], inplace=True)
    df.index=pd.to_datetime(df.index,dayfirst=True)
    df=df.resample('h').sum()
    df=df[~df.index.duplicated()]
    
    return df
    
    

In [71]:
genere_actual_load('data/Actual Load/Total Load - Day Ahead _ Actual_202301010000-202401010000.csv')

Unnamed: 0_level_0,Actual Total Load
date,Unnamed: 1_level_1
2023-01-01 00:00:00,79988.0
2023-01-01 01:00:00,77004.0
2023-01-01 02:00:00,72416.0
2023-01-01 03:00:00,68360.0
2023-01-01 04:00:00,66124.0
...,...
2023-12-31 19:00:00,116440.0
2023-12-31 20:00:00,115320.0
2023-12-31 21:00:00,111396.0
2023-12-31 22:00:00,100548.0


In [72]:
path='data/Actual Load'
Afile_paths=[os.path.join(path, f) for f in os.listdir(path) if f.endswith('.csv')]
Afile_paths

['data/Actual Load\\Total Load - Day Ahead _ Actual_202001010000-202101010000.csv',
 'data/Actual Load\\Total Load - Day Ahead _ Actual_202101010000-202201010000.csv',
 'data/Actual Load\\Total Load - Day Ahead _ Actual_202201010000-202301010000.csv',
 'data/Actual Load\\Total Load - Day Ahead _ Actual_202301010000-202401010000.csv']

In [76]:
ActL=[]
for f in Afile_paths:
    df=genere_actual_load(f)
    ActL.append(df)

actualload=pd.concat(ActL)

In [78]:
actualload.to_csv('data/Actual Load/Actual_Load.csv')