In [34]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

In [35]:
mortalité_filtered = pd.read_csv('mortalité_filtered.csv')

In [36]:
def s(n, variable, interet, data):

    # data = dataframe contenant toutes les variables
    # variable = nom de la colonne de la variable explicative dans data
    # interet = nom de la colonne de la variable cible dans data
    # n = nombre de bins désirées

    var_discrete = np.linspace(data[variable].min(), data[variable].max(), n)

    esperance = []
    bins = [] # contient le nombre de valeur dans chaque bins
    var_bins = [] # contient les valeurs de variables de chaque bins

    for i in range(n - 2):
        esp = data[interet][(data[variable] >= var_discrete[i])&(data[variable] < var_discrete[i + 1])].mean()
        count = data[interet][(data[variable] >= var_discrete[i])&(data[variable] < var_discrete[i + 1])].count()
        var = np.array(data[variable][(data[variable] >= var_discrete[i])&(data[variable] < var_discrete[i + 1])])
        esperance.append(esp)
        bins.append(count)
        var_bins.append(var.astype('float'))
    
    esp = data[interet][(data[variable] >= var_discrete[n - 2])&(data[variable] <= var_discrete[n - 1])].mean()
    count = data[interet][(data[variable] >= var_discrete[n - 2])&(data[variable] <= var_discrete[n - 1])].count()
    var = np.array(data[variable][(data[variable] >= var_discrete[n - 2])&(data[variable] <= var_discrete[n - 1])])
    esperance.append(esp)
    bins.append(count)
    var_bins.append(var.astype('float'))

    return(esperance, bins, var_bins)

def interpol(esperance, var_bins):
    var_centre = []
    for i in range(len(var_bins)):
        if len(var_bins[i]) !=0:
            var_centre.append(var_bins[i][len(var_bins[i])//2])
    
    interp_func = interp1d(var_centre, [x for x in esperance if not pd.isna(x)], kind='linear', fill_value='extrapolate')

    # Espérance interpolée pour toutes les valeurs de DHW
    esperance_interpolation = interp_func(np.unique(np.concatenate(var_bins)))

    return(esperance_interpolation, np.unique(np.concatenate(var_bins)))

def entrainement(n, data_train, variables, interet):
    esperances = []
    val_interpol = []
    for i in range(len(variables)):
        esperance, bins, var_bins = s(n, variables[i],interet, data_train)
        interpolation = interpol(esperance, var_bins)
        esperances.append(interpolation[0])
        val_interpol.append(interpolation[1])
    
    return(esperances, val_interpol)

def F(x, esperance_interpol, val_interpol):
    for i in range(len(val_interpol)):
        if val_interpol[i] > x:
            return(esperance_interpol[i - 1])
        elif x >= val_interpol[-1]:
            return(esperance_interpol[-1])
    return('erreur')

def prediction(n, variables, interet, data_train, data):

    # n = nombre de bins considérées (= 10)
    # variables qu'on considère pour expliquer le gradient de la mortalité (gradient ici) en format liste de str
    # variable d'intéret est la variable qu'on cherche à prédire (gradient de la mortalité) en format str
    # data_train = tableau pandas contenant les variables et la variables d'intéret sur lequel on entraine le modèle
    # data = tableau pandas contenant les variables aux temps auxquels on veut prédire la variable d'intéret

    esperances, val_interpol = entrainement(n, data_train, variables, interet)
    mort = []
    for k in range(len(data)):
        somme = 0
        for i in range(len(variables)):
            somme += F(data[variables[i]].iloc[k], esperances[i], val_interpol[i])
        mort.append(somme)
    return(mort)
    

In [37]:
# convert Date to datetime
mortalité_filtered['DateTime'] = pd.to_datetime(mortalité_filtered['DateTime'])

In [38]:
année = 2009

In [39]:
esp, _, var_bins = s(10, 'MMM', 'der_DC_mean', mortalité_filtered[mortalité_filtered['DateTime'].dt.year <= année])

In [41]:
var_centre = []
for i in range(len(var_bins)):
    if len(var_bins[i]) !=0:
        var_centre.append(var_bins[i][len(var_bins[i])//2])

In [42]:
var_centre

[28.25822874704997,
 28.75738826874764,
 28.871754492482832,
 29.05483870967742,
 29.17971629481162,
 29.461290322580645]

In [43]:
[x for x in esp if not pd.isna(x)]

[0.0126999999999998,
 -0.0274832389797086,
 0.006770056999037682,
 0.007220224331866519,
 0.00841013031315626,
 0.008465530260095201]

In [44]:
interpolation = interpol(esp, var_bins)

In [45]:
prediction(10, ['MMM', 'dhw', 'pdo'], 'der_DC_mean', mortalité_filtered[mortalité_filtered['DateTime'].dt.year <= année], mortalité_filtered[mortalité_filtered['DateTime'].dt.year > année])

[0.00997866114252574,
 0.004003081595847526,
 -0.008225485557141265,
 0.0438286805299549,
 0.03552776145345855,
 0.01912831143857859,
 0.023227037039196374,
 -0.0003892926316375542,
 0.019855241590303982,
 0.019855241590303982,
 0.0431257040192506,
 0.03582854157656652,
 0.03710917888503375,
 0.010693773713937859,
 0.04540742339992268,
 0.02630755424317636,
 0.04067079061267122,
 0.033817912707909306,
 0.0010887706646245266,
 -0.009828670753871883,
 0.02529223360974589,
 0.02808099890411913,
 0.022800182608902,
 0.01938173118114922,
 0.03855411462502869,
 0.00914664286392201,
 0.026895418806476515,
 -0.008225485557141265,
 -0.0016636848644983396,
 0.00914664286392201,
 0.0023881201603534524,
 0.012590007353316522,
 0.012590007353316522,
 0.007431494511600611,
 0.007106989835133408,
 0.04828585459712912,
 0.049752205422854745,
 0.03625895218570368]