<a href="https://colab.research.google.com/github/Dr-Carlos-Villasenor/PatternRecognition/blob/main/PR03_regresion_logistica_softmax.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reconocimiento de patrones
## Dr. Carlos Villaseñor
### Regresión logística y SoftMax

# La función logística

1. Importamos paquetería

In [None]:
import numpy as np
import matplotlib.pyplot as plt

2. Esta es la función logística

In [None]:
x = np.linspace(-10, 10, 100)
sig = 1 / (1 + np.exp(-x))
plt.plot([-10, 10], [0, 0], "k-")
plt.plot([-10, 10], [0.5, 0.5], "k:")
plt.plot([-10, 10], [1, 1], "k:")
plt.plot([0, 0], [-1.1, 1.1], "k-")
plt.plot(x, sig, "b-", linewidth=2, label=r"$\sigma(x) = \frac{1}{1 + e^{-x}}$")
plt.xlabel("x")
plt.legend(loc="upper left", fontsize=10)
plt.axis([-10, 10, -0.1, 1.1])
plt.show()

# Importar datos

In [None]:
# Importar paquetes
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Lectura de datos

In [None]:
!wget 'https://raw.githubusercontent.com/Dr-Carlos-Villasenor/PatternRecognition/main/Dataset/diabetes.csv'

In [None]:
# Leer datos
data = pd.read_csv('diabetes.csv')
print(data.info())

# Exploración de datos

In [None]:
no_show = pd.plotting.scatter_matrix(data)

In [None]:
corr = data.corr()
print(corr)
import seaborn as sns
sns.heatmap(corr,
         xticklabels=corr.columns,
         yticklabels=corr.columns)

# Regresión logística

In [None]:
## Primer método

# Eleguir variables
x = np.asanyarray(data[['Glucose']])
y = np.asanyarray(data[['Outcome']]).ravel()

# Dividir Train/Test
xtrain, xtest, ytrain, ytest = train_test_split(x,y)

# Crear modelo y entrenar
logit=LogisticRegression(solver='lbfgs')
logit.fit(xtrain,ytrain)

# Calcular Score
print('Train: ', logit.score(xtrain,ytrain))
print('Test: ', logit.score(xtest,ytest))

g = np.linspace(0,200,50).reshape(-1,1)
prediction = logit.predict_proba(g)
plt.plot(xtrain,ytrain,'.b')
plt.plot(xtest,ytest,'.r')
plt.xlabel('Glucose')
plt.ylabel('Outcome')
plt.plot(g, prediction)

In [None]:
## Segundo método

# Eleguir variables
x = np.asanyarray(data.drop(columns=['Outcome']))
y = np.asanyarray(data[['Outcome']]).ravel()

# Dividir Train/Test
xtrain, xtest, ytrain, ytest = train_test_split(x,y)

# Importar nuevos paquetes
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Crear modelo y entrenar
model = Pipeline([
    ('scaler', StandardScaler()),
    ('logit', LogisticRegression(solver='lbfgs')) ])

# Crear modelo y entrenar
model.fit(xtrain,ytrain)

# Calcular Score
print('Train: ', model.score(xtrain,ytrain))
print('Test: ', model.score(xtest,ytest))

# Explicación de variables

In [None]:
coeff = np.abs(model.named_steps['logit'].coef_[0])
coeff = coeff / np.sum(coeff)
labels = list(data.drop(columns=['Outcome']).columns)
features = pd.DataFrame()
features['Features'] = labels
features['importance'] = coeff
features.sort_values(by=['importance'], ascending=True, inplace=True)
features.set_index('Features', inplace=True)
features.importance.plot(kind='barh')
plt.xlabel('Importance')
print(features)

# Regresión polinomial logística

In [None]:
## Tercer método

# Eleguir variables
x = np.asanyarray(data.drop(columns=['Outcome']))
y = np.asanyarray(data[['Outcome']]).ravel()

# Dividir Train/Test
xtrain, xtest, ytrain, ytest = train_test_split(x,y)

# Importar nuevos paquetes
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

# Crear modelo y entrenar
model = Pipeline([
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('scaler', StandardScaler()),
    ('logit', LogisticRegression(solver='lbfgs'))
                ])


model.fit(xtrain,ytrain)

# Calcular Score
print('Train: ', model.score(xtrain,ytrain))
print('Test: ', model.score(xtest,ytest))

# Regresión Softmax

In [None]:
from sklearn import datasets
iris = datasets.load_iris()
list(iris.keys())

In [None]:
print(iris.DESCR)

In [None]:
x = iris["data"][:, (2, 3)]  # petal length, petal width
y = iris["target"]

plt.plot(x[y==2, 0], x[y==2, 1], "g^", label="Iris-Virginica")
plt.plot(x[y==1, 0], x[y==1, 1], "bs", label="Iris-Versicolor")
plt.plot(x[y==0, 0], x[y==0, 1], "yo", label="Iris-Setosa")

# Dividir Train/Test
xtrain, xtest, ytrain, ytest = train_test_split(x,y)

softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs")
softmax_reg.fit(xtrain, ytrain)

# Calcular Score
print('Train: ', softmax_reg.score(xtrain,ytrain))
print('Test: ', softmax_reg.score(xtest,ytest))

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

ypred = softmax_reg.predict(xtest)
cm = confusion_matrix(ytest, ypred)
cm_display = ConfusionMatrixDisplay(cm, display_labels=['Setosa',
                                                        'Versicolor',
                                                        'Virginica']).plot()

In [None]:
from sklearn.metrics import classification_report
print('Classification Report: /n', classification_report(ytest, ypred))