In [4]:
# ***********************************************************************************
# Changelog
# *******

# Code written by Ousmane Diao 


#*************************************************************************************************
# ***********************************************************************************
# Imports
# *******
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.genmod.families import Poisson, NegativeBinomial, Gaussian
from statsmodels.genmod.families.links import identity, log, sqrt
import matplotlib.pyplot as plt
import math 
from scipy import optimize
import warnings

In [5]:
import os
#os.chdir ('C:\\Users\\odiao\\Desktop\\Model paludisme')
os.chdir('C:\\Users\\odiao\\Dropbox\\Model paludisme')
df = pd.read_csv('Dakar.csv', header=0, infer_datetime_format=False, parse_dates=[0])
#***************************************************************************************************
#Defined the avaerage temperature
AVT_Dakar=(df.Tempmin_Dakar + df.Tempmax_Dakar)/2
AVT_Fatick=(df.Tempmin_Fatick + df.Tempmax_Fatick)/2
AVT_Kedougou=(df.Tempmin_Kedougou + df.Tempmax_Kedougou)/2
warnings.filterwarnings("ignore") 

In [6]:
#*******************************************************************************************************************
#*******************Test solver scipy.optimize ***********************************************************
  #Define the mean function: for example link=identity
def mu(X, beta):
    return X @ beta  #Defined with link=id
    #Define the log-likelihood function of Poisson distribution
def ll_P(beta, X, y):
    return -np.sum( y * np.log(mu(X, beta)) - mu(X, beta)) #- np.log(gamma(y+1)
    #Define the log-likelihood function of negative binomial distribution   

t_i=5; t_c=84; t_e=108; h=1; intercept=np.ones(t_e)

for sw_regions in ['Dakar', 'Fatick', 'Kedougou']:
    print("*****************************", sw_regions, "****************")
    #print("******************** FORECAST ACCURACY MEASURES - latex table ********************")
    #print('\\begin{tabular}{|c|c|c|c|c|c|c|}')
    #print('\\hline')
    #print('Regions & Method  &  &  & & & \\\\ \\hline')
    if sw_regions == 'Dakar':  
        lag=[2,2,5,1] # if we applied the restriction: lag>=2 in rainfall, temperature and humidity but lag stays equal to 1 for malaria in the past.
        y_o = df.MC_Dakar.values
        X = np.c_[df.Rainfall, AVT_Dakar, df.Humidity_D, df.MC_Dakar, intercept]
    if sw_regions == 'Fatick':
        lag=[3,4,3,1] # if we applied the restriction: lag>=2
        y_o = df.MC_Fatick.values
        X = np.c_[df.Rainfall_F, AVT_Fatick, df.Humidity_F, df.MC_Fatick, intercept]      
    if sw_regions == 'Kedougou':
        lag=[2,5,2,1] # if we applied the restriction: lag>=2
        y_o = df.MC_Kedougou.values
        X = np.c_[df.Rainfall_K, AVT_Kedougou, df.Humidity_K, df.MC_Kedougou, intercept]
    y_train = y_o[t_i:t_c]
    X_train = np.c_[X[t_i-lag[0]:t_c-lag[0],0], X[t_i-lag[1]:t_c-lag[1],1], X[t_i-lag[2]:t_c-lag[2],2], X[t_i-lag[3]:t_c-lag[3],3], X[t_i:t_c,4]]
    y_test = y_o[t_c:t_e]
    X_test = np.c_[X[t_c-lag[0]:t_e-lag[0],0], X[t_c-lag[1]:t_e-lag[1],1], X[t_c-lag[2]:t_e-lag[2],2], X[t_c-lag[3]:t_e-lag[3],3], X[t_c:t_e,4]]

    model = sm.GLM(y_train, X_train, family=Poisson(link=identity)).fit()
    for sw_method in ['IRLS', 'optimize.fmin']:
        print("******************", sw_method, "**************************")
        if sw_method=='IRLS':
            x_opt = model.params
            print(sw_regions, "&", sw_method, "&", x_opt[0].round(3),"&", x_opt[1].round(3), "&", x_opt[2].round(3), "&", x_opt[3].round(3), "&", x_opt[4].round(3),"\\\\hline")
            RMSE_train = np.linalg.norm(X_train@x_opt - y_train) / math.sqrt(t_c-t_i)
            RMSE_test = np.linalg.norm(X_test@x_opt - y_test) / math.sqrt(t_e-t_c)
            print(sw_regions, " RMSE\_train: ", RMSE_train.round(2), "RMSE\_test: ", RMSE_test.round(2))
        if sw_method=='optimize.fmin':
            fun = lambda x:ll_P(x, X_train, y_train)  # function phi is defined above
            x_guess_0 = np.array([model.params[0], model.params[1], model.params[2], model.params[3], model.params[4]])
            x_guess_1 = x_guess_0*2
            x_guess_2 = x_guess_0**2
            x_guess_3 = x_guess_0*10
            x_guess_4 = x_guess_0*0
            sw_x_guess = [x_guess_0, x_guess_1, x_guess_2, x_guess_3, x_guess_4]
            for x_guess_indice in np.arange(0, len(sw_x_guess)):
                print("****************", x_guess_indice, "****************")
                x_guess = sw_x_guess[x_guess_indice]
                x_opt = optimize.fmin(fun, x_guess)  # call the optimization solver
                print("initian guess: ", x_guess)
                print(sw_regions, "&", sw_method, "&",x_guess_indice, "&", x_opt[0].round(3),"&", x_opt[1].round(3), "&", x_opt[2].round(3), "&", x_opt[3].round(3), "&", x_opt[4].round(3),"\\\\hline")
                RMSE_train = np.linalg.norm(X_train@x_opt - y_train) / math.sqrt(t_c-t_i)
                RMSE_test = np.linalg.norm(X_test@x_opt - y_test) / math.sqrt(t_e-t_c)
                print(sw_regions, " RMSE\_train: ", RMSE_train.round(2), "RMSE\_test: ", RMSE_test.round(2))
        
#print('\\end{tabular}') 

***************************** Dakar ****************
****************** IRLS **************************
Dakar & IRLS & 44.611 & -134.894 & 52.75 & 0.412 & -598.593 \\hline
Dakar  RMSE\_train:  2245.27 RMSE\_test:  2689.74
****************** optimize.fmin **************************
**************** 0 ****************
Optimization terminated successfully.
         Current function value: -2735542.949784
         Iterations: 162
         Function evaluations: 287
initian guess:  [ 4.46109465e+01 -1.34894114e+02  5.27498566e+01  4.12472532e-01
 -5.98592753e+02]
Dakar & optimize.fmin & 0 & 44.611 & -134.894 & 52.75 & 0.412 & -598.592 \\hline
Dakar  RMSE\_train:  2245.27 RMSE\_test:  2689.74
**************** 1 ****************
Optimization terminated successfully.
         Current function value: -2735542.949784
         Iterations: 586
         Function evaluations: 968
initian guess:  [ 8.92218930e+01 -2.69788228e+02  1.05499713e+02  8.24945064e-01
 -1.19718551e+03]
Dakar & optimize.fmin &