<a href="https://colab.research.google.com/github/Dr-Carlos-Villasenor/PatternRecognition/blob/main/PR03_02_GLM_statsmodels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reconocimiento de patrones
## Dr. Carlos Villaseñor
### Modelos Lineales Generalizados con statsmodels



Primeramente instalamos la paquetería

In [None]:
!pip install statsmodels

# Modelo Gamma
Ejemplo tomado de  https://www.statsmodels.org/stable/examples/notebooks/generated/glm.html

Importamos paqueterias

In [2]:
import numpy as np
import statsmodels.api as sm
from scipy import stats
from matplotlib import pyplot as plt

Cargamos datos desde la misma API

In [None]:
print(sm.datasets.star98.NOTE)

In [4]:
data = sm.datasets.star98.load()
data.exog = sm.add_constant(data.exog, prepend=False)

Variables de entrada

In [None]:
data.exog

Variable de salida

In [None]:
print(data.endog)

Crear modelo y entrenar

In [None]:
glm_binom = sm.GLM(data.endog, data.exog, family=sm.families.Binomial())
res = glm_binom.fit()
print(res.summary())

# Regresión Poisson
Ejemplo tomado de https://timeseriesreasoning.com/contents/poisson-regression-model/

In [9]:
import pandas as pd
from patsy import dmatrices
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
!wget 'https://raw.githubusercontent.com/Dr-Carlos-Villasenor/PatternRecognition/main/Dataset/nyc_bb_bicyclist_counts.csv'

In [11]:
df = pd.read_csv('nyc_bb_bicyclist_counts.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])

In [None]:
df.head()

In [12]:
ds = df.index.to_series()
df['MONTH'] = ds.dt.month
df['DAY_OF_WEEK'] = ds.dt.dayofweek
df['DAY'] = ds.dt.day

In [None]:
mask = np.random.rand(len(df)) < 0.8
df_train = df[mask]
df_test = df[~mask]
print('Training data set length='+str(len(df_train)))
print('Testing data set length='+str(len(df_test)))

In [15]:
expr = """BB_COUNT ~ DAY  + DAY_OF_WEEK + MONTH + HIGH_T + LOW_T + PRECIP"""

In [17]:
ytrain, xtrain = dmatrices(expr, df_train, return_type='dataframe')
ytest,xtest = dmatrices(expr, df_test, return_type='dataframe')

In [18]:
poisson_training_results = sm.GLM(ytrain, xtrain, family=sm.families.Poisson()).fit()

In [None]:
print(poisson_training_results.summary())

In [None]:
poisson_predictions = poisson_training_results.get_prediction(xtest)
predictions_summary_frame = poisson_predictions.summary_frame()
print(predictions_summary_frame)