# Prever Dados de Churn a partir da base

In [47]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [164]:
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/Classificação/Redes Neurais/churn.xlsx')

In [165]:
df.head()

Unnamed: 0,customerID,tenure,PhoneService,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,34,Yes,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,2,Yes,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,45,No,One year,No,Bank transfer,42.3,1840.75,No
4,9237-HQITU,2,Yes,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


## Ajustando a base

In [166]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

In [167]:
df.head()

Unnamed: 0,customerID,tenure,PhoneService,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,1,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,34,Yes,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,2,Yes,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,45,No,One year,No,Bank transfer,42.3,1840.75,No
4,9237-HQITU,2,Yes,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### Verificando Nulos

In [168]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7042 entries, 0 to 7041
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7042 non-null   object 
 1   tenure            7042 non-null   int64  
 2   PhoneService      7042 non-null   object 
 3   Contract          7042 non-null   object 
 4   PaperlessBilling  7042 non-null   object 
 5   PaymentMethod     7042 non-null   object 
 6   MonthlyCharges    7042 non-null   float64
 7   TotalCharges      7031 non-null   float64
 8   Churn             7042 non-null   object 
dtypes: float64(2), int64(1), object(6)
memory usage: 495.3+ KB


In [169]:
df.isnull().sum()

customerID           0
tenure               0
PhoneService         0
Contract             0
PaperlessBilling     0
PaymentMethod        0
MonthlyCharges       0
TotalCharges        11
Churn                0
dtype: int64

In [170]:
df.dropna(inplace=True)

In [171]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7031 entries, 0 to 7041
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7031 non-null   object 
 1   tenure            7031 non-null   int64  
 2   PhoneService      7031 non-null   object 
 3   Contract          7031 non-null   object 
 4   PaperlessBilling  7031 non-null   object 
 5   PaymentMethod     7031 non-null   object 
 6   MonthlyCharges    7031 non-null   float64
 7   TotalCharges      7031 non-null   float64
 8   Churn             7031 non-null   object 
dtypes: float64(2), int64(1), object(6)
memory usage: 549.3+ KB


### Separando as bases

In [172]:
# Retirando o CustomerID
df = df.drop(columns=['customerID'], axis=1)

In [173]:
# Separando a base
x =df.drop(columns=['Churn'], axis=1)

y= df[['Churn']].copy()

### Retirando as variáveis categoricas

In [174]:
x.head(4)

Unnamed: 0,tenure,PhoneService,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,1,No,Month-to-month,Yes,Electronic check,29.85,29.85
1,34,Yes,One year,No,Mailed check,56.95,1889.5
2,2,Yes,Month-to-month,Yes,Mailed check,53.85,108.15
3,45,No,One year,No,Bank transfer,42.3,1840.75


In [137]:
# le = LabelEncoder()

# #Transformando as variaveis categoricas
# le.fit(x.PhoneService)
# x.PhoneService = le.transform(x.PhoneService)

# le.fit(x.Contract)
# x.Contract = le.transform(x.Contract)

# le.fit(x.PaperlessBilling)
# x.PaperlessBilling = le.transform(x.PaperlessBilling)

# le.fit(x.PaymentMethod)
# x.PaymentMethod = le.transform(x.PaymentMethod)

In [175]:
x = pd.get_dummies(x)

In [176]:
le.fit(y.Churn)
y.Churn = le.transform(y.Churn)

In [178]:
x.head()

Unnamed: 0,tenure,MonthlyCharges,TotalCharges,PhoneService_No,PhoneService_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer,PaymentMethod_Credit card,PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,1,29.85,29.85,1,0,1,0,0,0,1,0,0,1,0
1,34,56.95,1889.5,0,1,0,1,0,1,0,0,0,0,1
2,2,53.85,108.15,0,1,1,0,0,0,1,0,0,0,1
3,45,42.3,1840.75,1,0,0,1,0,1,0,1,0,0,0
4,2,70.7,151.65,0,1,1,0,0,0,1,0,0,1,0


In [177]:
x_train,x_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

### Normalizando

In [179]:
# normalizando a base
scaler = StandardScaler()

scaler.fit(x_train, y_train)

In [180]:
x_train_scaled= scaler.transform(x_train)
x_test_scaled= scaler.transform(x_test)

x_train = pd.DataFrame(x_train_scaled)
x_test = pd.DataFrame(x_test_scaled)

In [181]:
x_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,-1.116177,0.360522,-0.827807,-0.33056,0.33056,0.902327,-0.513081,-0.562565,1.213296,-1.213296,-0.527154,-0.525777,1.406703,-0.546942
1,-0.912497,0.445196,-0.66795,-0.33056,0.33056,0.902327,-0.513081,-0.562565,-0.824201,0.824201,-0.527154,1.901948,-0.710882,-0.546942
2,1.613137,-1.296428,-0.151221,-0.33056,0.33056,-1.108246,-0.513081,1.777572,-0.824201,0.824201,-0.527154,1.901948,-0.710882,-0.546942
3,-1.197649,0.179552,-0.919626,-0.33056,0.33056,0.902327,-0.513081,-0.562565,-0.824201,0.824201,-0.527154,-0.525777,1.406703,-0.546942
4,-0.708817,0.337278,-0.517152,-0.33056,0.33056,0.902327,-0.513081,-0.562565,-0.824201,0.824201,1.896979,-0.525777,-0.710882,-0.546942


## Iniciando o modelo de Redes Neurais

In [186]:
clf = MLPClassifier(hidden_layer_sizes=(100,50,20), activation='logistic')

clf.fit(x_train,y_train)

  y = column_or_1d(y, warn=True)


In [190]:
y_pred = clf.predict(x_test)

In [191]:
print(f'Acurácia: {accuracy_score(y_test,y_pred)*100}%')
print(f'F1_score: {f1_score(y_test,y_pred)*100}%')

Acurácia: 78.89125799573561%
F1_score: 53.22834645669292%
