# 01_Essai

In [1]:
import pandas as pd

In [7]:
def import_csv_to_pandas(file_path, delimiter=',', encoding='utf-8', verbose=False):
    """
    Importe un fichier CSV et le transforme en pandas DataFrame
    
    Paramètres:
        file_path (str): Chemin vers le fichier CSV
        delimiter (str): Séparateur de colonnes (par défaut ',')
        encoding (str): Encodage du fichier (par défaut 'utf-8')
    
    Retour:
        pandas.DataFrame: DataFrame contenant les données du CSV
    """
    try:
        # Lecture du fichier CSV
        df = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
        
        # Affichage des informations de base
        if verbose:
            print("Importation réussie !")
            print(f"Nombre de lignes : {len(df)}")
            print(f"Nombre de colonnes : {len(df.columns)}")
            print("\nAperçu des données :")
            print(df.head())
        
        return df
    
    except FileNotFoundError:
        print(f"Erreur : Le fichier {file_path} n'a pas été trouvé.")
        return None
    except Exception as e:
        print(f"Une erreur s'est produite lors de l'importation : {str(e)}")
        return None

In [12]:
csv_file = "../data/raw/energy/ODRE_injections_quotidiennes_consolidees_rpt.csv"
    

dataframe = import_csv_to_pandas(csv_file, delimiter=';', verbose=False)

In [22]:
# Afficher les 5 premières lignes
dataframe.head()


Unnamed: 0,Date,Filière,Puissance maximale,00h00,00h30,01h00,01h30,02h00,02h30,03h00,...,20h30,21h00,21h30,22h00,22h30,23h00,23h30,Nb points d'injection,Energie journalière (MWh),Qualité
0,2023-11-01,Eolien,Supérieure à 1MW et Inférieure ou égale à 12MW,98.646,100.818,104.568,99.842,116.054,101.556,110.876,...,97.158,119.668,128.615,142.858,148.172,159.316,163.831,31,2714.957,Consolidée
1,2023-11-06,Hydraulique,Supérieure à 12MW,7493.166515,7243.615556,7016.016328,6902.829969,7092.614869,7214.281787,7143.952891,...,11444.101247,10820.362426,10595.881004,9582.279057,9301.720723,9679.070462,8619.92305,272,203127.119499,Consolidée
2,2023-11-13,Hydraulique,Supérieure à 12MW,7004.352912,7154.369217,6741.086084,7004.669116,6829.562854,6577.762935,6073.520645,...,8392.134444,7053.429417,6681.539751,6507.922665,6564.5546,7173.043417,7075.403643,272,170970.399437,Consolidée
3,2023-11-15,Thermique non renouvelable,Supérieure à 12MW,2096.0232,2109.22499,1322.935985,1515.861055,1314.091015,1646.090095,1125.41812,...,4090.64,3878.309,3718.294,3132.341,3016.058,3322.646,3474.756,52,68019.680092,Consolidée
4,2023-11-19,Eolien,Supérieure à 1MW et Inférieure ou égale à 12MW,101.366,136.0,129.07,125.706,131.31,110.814,109.811,...,128.835,127.58,121.102,121.039,106.976,107.59,106.595,31,2668.0595,Consolidée


In [23]:
dataframe.tail(2)


Unnamed: 0,Date,Filière,Puissance maximale,00h00,00h30,01h00,01h30,02h00,02h30,03h00,...,20h30,21h00,21h30,22h00,22h30,23h00,23h30,Nb points d'injection,Energie journalière (MWh),Qualité
27136,2025-06-30,Solaire,Supérieure à 1MW et Inférieure ou égale à 12MW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,42.018,19.382,3.772,0.0,0.0,0.0,0.0,87,4336.96675,Consolidée
27137,2025-06-30,Thermique non renouvelable,Supérieure à 1MW et Inférieure ou égale à 12MW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9,0.0,Consolidée


In [24]:
dataframe.sample(3)

Unnamed: 0,Date,Filière,Puissance maximale,00h00,00h30,01h00,01h30,02h00,02h30,03h00,...,20h30,21h00,21h30,22h00,22h30,23h00,23h30,Nb points d'injection,Energie journalière (MWh),Qualité
16395,2020-03-06,Thermique non renouvelable,Inférieure ou égale à 1MW,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12,0.0,Consolidée
5518,2022-01-31,Hydraulique,Supérieure à 12MW,2665.255,2887.859,2514.523,2364.151,2332.238,2151.086,1855.687,...,8182.53,6098.754,4723.724,4260.41,4140.007,4712.175,3658.525,732,102280.8,Consolidée
5475,2021-01-01,Nucléaire,Supérieure à 12MW,50599.1,50534.878,50466.809,49737.85,48783.143,48356.494,48137.295,...,51301.834,51423.763,51330.546,51286.161,51066.636,51066.144,51044.887,112,1203201.0,Consolidée


In [17]:
# Afficher les noms des colonnes
print(dataframe.columns)

# Afficher le nombre de lignes et colonnes
print(dataframe.shape)


Index(['Date', 'Filière', 'Puissance maximale', '00h00', '00h30', '01h00',
       '01h30', '02h00', '02h30', '03h00', '03h30', '04h00', '04h30', '05h00',
       '05h30', '06h00', '06h30', '07h00', '07h30', '08h00', '08h30', '09h00',
       '09h30', '10h00', '10h30', '11h00', '11h30', '12h00', '12h30', '13h00',
       '13h30', '14h00', '14h30', '15h00', '15h30', '16h00', '16h30', '17h00',
       '17h30', '18h00', '18h30', '19h00', '19h30', '20h00', '20h30', '21h00',
       '21h30', '22h00', '22h30', '23h00', '23h30', 'Nb points d'injection',
       'Energie journalière (MWh)', 'Qualité'],
      dtype='object')
(27138, 54)


In [25]:
# Afficher le type de chaque colonnes
dataframe.dtypes


Date                          object
Filière                       object
Puissance maximale            object
00h00                        float64
00h30                        float64
01h00                        float64
01h30                        float64
02h00                        float64
02h30                        float64
03h00                        float64
03h30                        float64
04h00                        float64
04h30                        float64
05h00                        float64
05h30                        float64
06h00                        float64
06h30                        float64
07h00                        float64
07h30                        float64
08h00                        float64
08h30                        float64
09h00                        float64
09h30                        float64
10h00                        float64
10h30                        float64
11h00                        float64
11h30                        float64
1

In [26]:
# Afficher des statistiques descriptives
dataframe.describe()

Unnamed: 0,00h00,00h30,01h00,01h30,02h00,02h30,03h00,03h30,04h00,04h30,...,20h00,20h30,21h00,21h30,22h00,22h30,23h00,23h30,Nb points d'injection,Energie journalière (MWh)
count,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,...,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0,27138.0
mean,4995.613197,4951.965886,4782.262166,4782.257744,4740.469525,4732.402623,4654.307272,4653.5009,4616.025679,4626.048622,...,5354.459534,5317.0182,5222.306459,5187.671508,5090.355137,5073.202621,5155.993149,5055.037126,102.162798,119240.4
std,12151.733827,12128.764882,11978.991436,11996.480423,11944.751226,11952.929156,11845.572174,11844.989179,11797.370067,11827.216783,...,12316.766745,12316.219481,12279.663021,12287.202907,12234.964097,12225.766282,12249.367977,12197.166996,170.390679,289317.2
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,15.456,15.336,9.91075,2.6465,0.0,0.0,0.0,0.0,20.0,1004.473
50%,97.68066,97.1055,96.9115,96.828,96.863,96.7135,96.278,96.113,95.783,96.004965,...,97.78875,97.3205,97.29335,97.5175,97.90975,97.65925,97.6625,97.6015,46.0,2761.042
75%,2347.739589,2161.6555,1699.14775,1664.906414,1597.455762,1593.48225,1485.554282,1477.402,1439.24725,1459.320703,...,3027.308571,3001.512956,2878.704955,2817.7255,2568.2595,2523.382953,2659.804967,2491.939083,110.0,55226.06
max,57901.295,58286.599,57415.013,57642.571,57280.46,57881.444,57372.347,57998.816,57816.718,58199.876,...,58607.729,58616.624,58627.604,58630.962,58544.5,58269.033,58334.038,57962.026,748.0,1398443.0


In [27]:
dataframe.isna().sum()

Date                         0
Filière                      0
Puissance maximale           0
00h00                        0
00h30                        0
01h00                        0
01h30                        0
02h00                        0
02h30                        0
03h00                        0
03h30                        0
04h00                        0
04h30                        0
05h00                        0
05h30                        0
06h00                        0
06h30                        0
07h00                        0
07h30                        0
08h00                        0
08h30                        0
09h00                        0
09h30                        0
10h00                        0
10h30                        0
11h00                        0
11h30                        0
12h00                        0
12h30                        0
13h00                        0
13h30                        0
14h00                        0
14h30   