# 1.2 - Clasificación

![blr](images/blr.jpeg)

### Ejemplo Churn

![churn](images/churn.png)


![churn2](images/churn2.png)



In [None]:
import pandas as pd                                # panel data, for handling dataframes
pd.set_option('display.max_columns', None)         # show all columns of the dataframe

import numpy as np                                 # numerical python, linear algebra library

import pylab as plt                                # plotting library
import seaborn as sns                              # plotting library
sns.set(style='white')                             # seaborn style


from sklearn.linear_model import LogisticRegression            # clasificar, logistic regression model   

from sklearn.preprocessing import StandardScaler               # standarized
from sklearn.preprocessing import LabelEncoder               

from sklearn.model_selection import train_test_split as tts    # split data into train and test sets


### Datos

In [None]:
df=pd.read_csv('../data/churn.csv')

df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.Churn.value_counts()

In [None]:
(df.Churn.value_counts()/len(df)).plot.bar(color=['b', 'r'],    # plot customer churn rate
                                           figsize=(10, 6),
                                           title='Churn Rate',
                                           rot=0,
                                           fontsize=12);


df.Churn.value_counts()/len(df)

### Transformacion

In [None]:
corr=df.corr()   # correlation matrix

corr

In [None]:
fig, ax=plt.subplots(figsize=(10,10))


mask=np.triu(np.ones_like(corr, dtype=bool))           # mask for superior triangular matrix


color_map=sns.diverging_palette(0, 10, as_cmap=True)   # color palette


# correlation heatmap
sns.heatmap(corr,                       # data
            mask=mask,                  # white mask
            cmap=color_map,             # color
            vmax=1,                     # vertical edge
            center=0,                   # plot center
            square=True,                # data square representation
            linewidth=.5,               # linewidth
            cbar_kws={'shrink': .5},    # lateral bar legend
            ax=ax                       # axes for plot size
           );

### Normalizacion

In [None]:
data_num=pd.DataFrame(StandardScaler().fit_transform(df._get_numeric_data()),
                      
                      columns=df._get_numeric_data().columns)


data_obj=df.select_dtypes(include='object')

data=pd.concat([data_num, data_obj], axis=1)

data.head()

### Label Encoding

In [None]:
le=LabelEncoder()

for c in data.columns:
    
    if data.dtypes[c]==object:
        
        le.fit(data[c].astype(str))
        
        data[c]=le.transform(data[c].astype(str))

In [None]:
data.shape

In [None]:
data.head()

In [None]:
data.info()

### Modelado

In [None]:
X=data.drop(['Churn', 'ChurnBinary', 'customerID'], axis=1)  # separacion X y

y=data.Churn

In [None]:
X.shape, y.shape

In [None]:
# stratify mantiene la distribucion del objetivo

X_train, X_test, y_train, y_test=tts(X, y, 
                                     random_state=42,  # la semilla de numeros aleatorios
                                     stratify=y)

In [None]:
#help(tts)

In [None]:
y_train.value_counts(), y_test.value_counts()

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# modelo

logreg=LogisticRegression(max_iter=2000)

In [None]:
#help(LogisticRegression)

In [None]:
logreg.fit(X_train, y_train)

In [None]:
y_pred=logreg.predict(X_test)

y_pred[:10]

In [None]:
y_prob=logreg.predict_proba(X_test)

y_prob[:10]

In [None]:
# umbral 0.7

[1 if e[1]>0.7 else 0 for e in y_prob][:10]

In [None]:
sum(y_pred==y_test)/y_pred.shape[0] * 100   # acierto, accuracy

In [None]:
logreg.intercept_

In [None]:
logreg.coef_

In [None]:
dict(zip(X.columns, logreg.coef_[0]))

In [None]:
y_pred_prime=[0 if e[1]<0.25 else 1 for e in y_prob]   # 1 si prob>0.25

y_pred_prime[:10]

In [None]:
sum(y_pred_prime==y_test)/y_test.shape[0] * 100   # acierto, accuracy