# Maximum likelihood estimation of Gumbel and GEV

In [1]:
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
from statsmodels.base.model import GenericLikelihoodModelResults
from statsmodels.base.model import LikelihoodModelResults

Start by generating some random data from the Gumbel distribution so we have something to work with

In [2]:
rv = stats.gumbel_r.rvs(loc=1,scale=0.5,size=1000)

In [3]:
rv

array([ 1.47548551e+00,  1.90800359e+00,  1.38340699e+00,  8.50704577e-01,
        9.69485651e-01,  7.52985641e-01,  6.01936746e-01,  9.37354242e-01,
        8.38036981e-01,  1.01071531e+00,  8.97007103e-01,  5.92084261e-01,
        1.02228788e+00,  5.73252361e-01,  1.17816485e+00,  5.77718537e-01,
        1.19083230e+00,  9.77272262e-01,  1.28675409e+00,  9.37393978e-01,
        1.57780838e+00,  8.54692399e-01,  9.78070269e-01,  1.07929313e+00,
        2.47067067e+00,  9.44417752e-01,  6.97385265e-01,  1.32364966e+00,
        8.06561684e-01,  1.45277931e+00,  9.00928485e-01,  1.13584718e+00,
        1.40669319e+00,  1.50045023e+00,  6.20981431e-01,  1.52521042e+00,
        1.03937898e+00,  2.16617816e+00,  8.29381578e-01,  2.70988697e-01,
        1.08525598e+00,  7.18859579e-01,  1.67442703e+00,  2.20683895e+00,
        9.23248015e-01,  6.50551757e-01,  2.76235470e+00,  3.69173818e-01,
        9.52104218e-01,  1.72613475e+00,  7.26051485e-01,  1.20958225e+00,
        1.19333843e+00,  

```Statsmodels``` requires us to have a DataFrame with the data, so let us create one

In [4]:
import pandas as pd

In [5]:
df = pd.DataFrame()

The Endogeneous variable is the variable we wish to model

In [6]:
df['endog'] = rv

Let us also create a column for each of the parameters in the Gumbel model, i.e. $a$ and $b$

In [7]:
df['a'] = 1
df['b'] = 1

$a$ and $b$ will be the exogeneous variable

In [8]:
exog = df[['a','b']]
endog = df.endog

To find the maximum likelihood estimator using Statsmodels we can employ the ```GenericLikelihoodModel``` which has some limitations, but it at least works for Gumbel. The only thing we need to do is to generate the loglikelihood and Statsmodels does the rest

In [9]:
class MyGumbel(GenericLikelihoodModel):
    def loglike(self, params):
        #exog = self.exog
        endog = self.endog
        return stats.gumbel_r.logpdf(endog,loc=params[1],scale=params[0]).sum()

Create the model

In [10]:
gumbel_model = MyGumbel(endog, exog)

Fit the model

In [11]:
gumbel_model_fit = gumbel_model.fit()

Optimization terminated successfully.
         Current function value: 0.880342
         Iterations: 54
         Function evaluations: 101


In [12]:
gumbel_model_fit.summary()

0,1,2,3
Dep. Variable:,endog,Log-Likelihood:,-880.34
Model:,MyGumbel,AIC:,1763.0
Method:,Maximum Likelihood,BIC:,1768.0
Date:,"Fri, 01 May 2020",,
Time:,11:30:28,,
No. Observations:,1000,,
Df Residuals:,999,,
Df Model:,0,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
a,0.4999,0.012,40.658,0.000,0.476,0.524
b,0.9861,0.017,59.202,0.000,0.953,1.019


In [19]:
stats.gumbel_r.fit(endog)[::-1]

(0.49992153848746346, 0.9860503956503851)

How did Statsmodels do this, well first of all the ML estimate is found by numerically solving the equations that we saw before in the lecture. Secondly they calculate the standard error using asymptotic normality of ML estimators, as I mentioned before. In the case of several parameters the variance of the estimator is the inverse of the hessian matrix (instead of $-1/l''$) 

In [17]:
variance = -np.linalg.inv(gumbel_model.hessian(stats.gumbel_r.fit(endog)[::-1]))
variance

array([[1.51175616e-04, 6.44567354e-05],
       [6.44567354e-05, 2.77412778e-04]])

To get standard errors we take square root of each entry in the variance-covariance matrix

In [20]:
np.sqrt(variance)

array([[0.01229535, 0.0080285 ],
       [0.0080285 , 0.01665571]])

We see that it perfectly fits what Statsmodels is doing. Let us now try to fit the GEV model

# The difficult GEV

In [32]:
rv = stats.genextreme.rvs(c=2,loc=1,scale=1,size=1000)

In [33]:
df = pd.DataFrame()
df['endog'] = rv

In [34]:
df['a'] = 1
df['b'] = 1
df['c'] = 1

In [35]:
exog = df[['a','b','c']]
endog = df.endog

In [36]:
class MyGEV(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog
        endog = self.endog
        return stats.genextreme.logpdf(endog,c=params[2],loc=params[1],scale=params[0]).sum()

In [37]:
GEV_model = MyGEV(endog, exog)

In [38]:
GEV_model_fit = GEV_model.fit(maxiter=1000)





Oj oj, thats not very good. In fact I dont know what the issue is here. We can work around it by using scipy to fit the parameters and statsmodels to get the hessian

In [39]:
params = stats.genextreme.fit(rv)
params

(1.9592515049850232, 0.0013671795559964401, 2.9361986085549807)

In [40]:
variance = np.abs(-np.linalg.inv(GEV_model.hessian(params)))
variance

array([[nan, nan, nan],
       [nan, nan, nan],
       [nan, nan, nan]])

In [41]:
result0 = LikelihoodModelResults(GEV_model,np.array(params),variance)
result = GenericLikelihoodModelResults(GEV_model, result0)

In [42]:
result.summary()

0,1,2,3
Dep. Variable:,endog,Log-Likelihood:,-inf
Model:,MyGEV,AIC:,inf
Method:,Maximum Likelihood,BIC:,inf
Date:,"Fri, 01 May 2020",,
Time:,11:39:59,,
No. Observations:,1000,,
Df Residuals:,999,,
Df Model:,0,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
a,1.9593,,,,,
b,0.0014,,,,,
c,2.9362,,,,,
