In [1]:
from utils import utils_hackathon as uh
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import importlib
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier


In [107]:
signe = "+"
periode_start = "07-01"
periode_end = "10-30"
indicateur = "T_MOYENNE"
dict_indicateurs = {
    "T_MAX": "Temperature maximale",
    "T_MIN": "Température minimale",
    "T_MOYENNE": "Température moyenne",
    "nb_episodes": "Nombre d'épisodes",
    "Nb_jours_max": "Nombre de jours où la température est > à 25 °C ",
}

In [18]:
def temp_moyenne(df):
    df["Année"] = df["DATE"].dt.year
    result = df.groupby(["Année"])["T_Q"].min().reset_index()
    result.rename(columns={"T_Q": "T_MOYENNE"}, inplace=True)
    return result

In [25]:
def filtre_temporel_periode(df, date_debut, date_fin):
    df["DATE"] = pd.to_datetime(df["DATE"])

    date_debut = pd.to_datetime(date_debut, format="%m-%d")
    date_fin = pd.to_datetime(date_fin, format="%m-%d")
    df["Mois-Jour"] = df["DATE"].dt.strftime("%m-%d")
    df_filtre = df[
        (df["Mois-Jour"] >= date_debut.strftime("%m-%d"))
        & (df["Mois-Jour"] <= date_fin.strftime("%m-%d"))
    ]
    df_filtre = df_filtre.drop("Mois-Jour", axis=1)
    return df_filtre

In [29]:
def calcul_val_reference(df, indic):
    df_periode = df[(df["Année"] >= 1951) & (df["Année"] <= 1980)]

    moyenne = df_periode[indic].mean()

    return moyenne

In [33]:
def calcule_anomalie(df, indicateur, moyenne_ref):
    df["ANOM_" + indicateur] = df[indicateur] - moyenne_ref
    return df

In [40]:
def calcul_taux_correlation(df_temp, df_upload, indicateur):
    corr, _ = pearsonr(df_temp[indicateur], df_upload['T_Q'])
    return corr 

In [98]:
def main_indic_temperature(
    df_mf,
    df_drias,
    df_upload, 
    indicateur,
    periode_start,
    periode_end,
    dict_indicateurs,
):
    if indicateur == "T_MAX":
        temp_function = temp_max
    elif indicateur == "T_MIN":
        temp_function = temp_min
    elif indicateur == "T_MOYENNE":
        temp_function = temp_moyenne

    # filtre temporel
    df_mf_filtre = filtre_temporel_periode(df_mf, periode_start, periode_end)
    df_drias_filtre = filtre_temporel_periode(df_drias, periode_start, periode_end)

    # filtre  et calcul température minimale par année sur MF
    df_mf_temp_min = temp_function(df_mf_filtre)
    val_ref = calcul_val_reference(df_mf_temp_min, indicateur)
    df_mf_temp_min = calcule_anomalie(df_mf_temp_min, indicateur, val_ref)

    # fitlre et calcul sur DRIAS
    df_drias_temp_min = temp_function(df_drias_filtre)
    val_ref_drias = calcul_val_reference(df_drias_temp_min, indicateur)
    df_drias_temp_min = calcule_anomalie(df_drias_temp_min, indicateur, val_ref_drias)

    # Anomalie et rolling average sur DRIAS
    df_drias_temp_min["rolling_avg"] = (
        df_drias_temp_min[indicateur].rolling(window=30).mean() - val_ref_drias
    )
    df_drias_temp_min["rolling_std"] = (
        df_drias_temp_min[indicateur].rolling(window=30).std()
    )
    df_drias_temp_min["avg + std"] = (
        df_drias_temp_min["rolling_avg"] + df_drias_temp_min["rolling_std"]
    )
    df_drias_temp_min["avg - std"] = (
        df_drias_temp_min["rolling_avg"] - df_drias_temp_min["rolling_std"]
    )

    df_drias_temp_min[['Année', indicateur]].to_csv("test.csv", index=False)
    df_mf_temp_min[['Année', indicateur]].to_csv("df_mf_temp.csv", index=False)

    return df_drias_temp_min, df_mf_temp_min
    
    #print(len(filtre_temporel_periode(df_upload, periode_start, periode_end)))
    #cr = calcul_taux_correlation(df_drias_temp_min, df_upload,indicateur)
    #print(cr)
    # Trace
    #fig = plot_climate_strip(
    #    df_mf_temp_min,
    #    df_drias_temp_min,
    #    indicateur,
    #    periode_start,
    #    periode_end,
    #    dict_indicateurs,
    #    val_ref,
    #    1951,
    #    1980,
    #)

    #return fig

In [99]:
df_drias1 = pd.read_csv("data/drias_montpellier_RCP8.5_df.csv")
df_mf = pd.read_csv("data/mf_montpellier.csv")
df_drias1.head(5)

Unnamed: 0,DATE,T_Q
0,1951-01-01,4.1053
1,1951-01-02,1.32964
2,1951-01-03,2.46975
3,1951-01-04,1.80877
4,1951-01-05,1.30822


In [100]:
importlib.reload(uh)
main_indic_temperature(
    df_mf=df_mf,
    df_drias=df_drias,
    df_upload = df_drias1, 
    indicateur=indicateur,
    periode_start="07-01",
    periode_end="10-30",
    dict_indicateurs=dict_indicateurs,
)

     Année  T_MOYENNE  ANOM_T_MOYENNE  rolling_avg  rolling_std  avg + std  \
0     1951   12.25485        1.864069          NaN          NaN        NaN   
1     1952   11.01450        0.623719          NaN          NaN        NaN   
2     1953   10.64294        0.252159          NaN          NaN        NaN   
3     1954   12.45843        2.067649          NaN          NaN        NaN   
4     1955   13.15600        2.765219          NaN          NaN        NaN   
..     ...        ...             ...          ...          ...        ...   
145   2096   12.78478        2.393999     4.289905     1.748472   6.038378   
146   2097   12.21514        1.824359     4.285773     1.754333   6.040107   
147   2098   14.62982        4.239039     4.270216     1.752549   6.022765   
148   2099   13.41003        3.019249     4.216013     1.765644   5.981657   
149   2100   19.67285        9.282069     4.339501     1.980608   6.320109   

     avg - std  
0          NaN  
1          NaN  
2          N

#### TEST CORRELATION

In [3]:
df_drias = pd.read_csv("drias_temp.csv")
df_mf = pd.read_csv("df_mf_temp.csv")
df_index = pd.read_csv('data/qualite_vin (1).csv')

In [4]:
df = uh.create_df_index_var_metier(df_mf, df_index)

In [5]:
df95, df30, df50 = uh.main_inspect_csv(df_index, df_mf, df_drias)

    DATE  index
0   1958    9.6
1   1959    9.2
2   1960    8.0
3   1961    9.0
4   1962   10.1
..   ...    ...
61  2019   14.7
62  2020   10.4
63  2021   11.5
64  2022   15.7
65  2023   13.7

[66 rows x 2 columns]       0     1
0   3.0  1986
1   2.0  1987
2   2.0  1988
3   4.0  1989
4   4.0  1990
5   4.0  1991
6   3.0  1992
7   0.0  1993
8   4.0  1994
9   2.0  1995
10  2.0  1996
11  1.0  1997
12  4.0  1998
13  3.0  1999
14  3.0  2000
15  2.0  2001
16  2.0  2002
17  3.0  2003
18  2.0  2004
19  4.0  2005
20  2.0  2006
21  0.0  2007
22  4.0  2008
23  3.0  2009
24  4.0  2010
25  2.0  2011
26  3.0  2012
27  3.0  2013
28  4.0  2014
29  2.0  2015
30  5.0  2016
31  3.0  2017
32  4.0  2018
33  4.0  2019
34  5.0  2020
35  1.0  2021




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [8]:
image = uh.show_box_plot(df95, df30, df50)

In [9]:
image.show()

In [10]:
fig = go.Figure()

fig.add_trace(go.Box(y=df95["qualite"], name="Qualité"))

fig.update_layout(title="Box Plot de la Qualité par Année",
                  yaxis_title="Qualité",
                  xaxis_title="Année")

fig.show()

In [256]:
df50

Unnamed: 0,qualite,Année
84,2.0,2035
85,4.0,2036
86,2.0,2037
87,2.0,2038
88,3.0,2039
89,2.0,2040
90,4.0,2041
91,2.0,2042
92,4.0,2043
93,2.0,2044
