<a href="https://colab.research.google.com/github/SergioCarmo-ro/TelecomX_BR_Parte_II/blob/main/TelecomX_BR_Parte_II.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

🎯 Missão

A nova missão é desenvolver modelos preditivos capazes de prever quais clientes têm maior chance de cancelar seus serviços.

A empresa quer antecipar o problema da evasão, e cabe a você construir um pipeline robusto para essa etapa inicial de modelagem.

📝 Extração do Arquivo Tratado

  Fazendo Leitura dos Dados

In [1]:
import pandas as pd
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder

In [2]:
dados = pd.read_csv('/content/dados_tratados.csv')

In [3]:
display(dados.head())

Unnamed: 0,customerID,Churn,customer,phone,internet,account
0,0002-ORFBO,No,"{'gender': 'Female', 'SeniorCitizen': 0, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'One year', 'PaperlessBilling': '..."
1,0003-MKNFE,No,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
2,0004-TLHLJ,Yes,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
3,0011-IGKFF,Yes,"{'gender': 'Male', 'SeniorCitizen': 1, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
4,0013-EXCHZ,Yes,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."


In [4]:
dados.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   customerID  7267 non-null   object
 1   Churn       7043 non-null   object
 2   customer    7267 non-null   object
 3   phone       7267 non-null   object
 4   internet    7267 non-null   object
 5   account     7267 non-null   object
dtypes: object(6)
memory usage: 340.8+ KB


Eliminando Colunas irrelevantes

In [5]:
# Colunas que são identificadores únicos ou não contribuem para a previsão
colunas_irrelevantes = ['ID', 'ClienteID', 'Nome', 'CPF', 'Telefone', 'Email']  # Exemplo de colunas a remover
colunas_a_remover = [col for col in colunas_irrelevantes if col in dados.columns]  # Verifica se as colunas existem
dados = dados.drop(columns=colunas_a_remover)

In [6]:
# Colunas removidas e  que permaneceram
print("Colunas removidas:", colunas_a_remover)
print("Colunas restantes:", dados.columns.tolist())

Colunas removidas: []
Colunas restantes: ['customerID', 'Churn', 'customer', 'phone', 'internet', 'account']


 Identificando variáveis categóricas

In [7]:
# Vamos assumir que variáveis categóricas são do tipo 'object' ou 'category'
colunas_categoricas = dados.select_dtypes(include=['object', 'category']).columns.tolist()
print("Variáveis categóricas detectadas:", colunas_categoricas)

Variáveis categóricas detectadas: ['customerID', 'Churn', 'customer', 'phone', 'internet', 'account']


Aplicando one-hot encoding nas variáveis categóricas

In [8]:
# Identify numerical and categorical features *before* one-hot encoding
colunas_numericas = dados.select_dtypes(include=['int64', 'float64']).columns.tolist()
colunas_categoricas = dados.select_dtypes(include=['object', 'category']).columns.tolist()

# Remove 'Churn' from categorical columns if it exists, as it's the target variable
if 'Churn' in colunas_categoricas:
    colunas_categoricas.remove('Churn')

# Create the column transformer
column_transformer = make_column_transformer(
    (OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore'), colunas_categoricas),
    remainder='passthrough'  # Keep numerical columns for now
)

Aplicando a transformação

In [9]:
# Aplicar a transformação
dados_transformados = column_transformer.fit_transform(dados)

In [10]:
# Obter nomes das novas colunas após one-hot encoding
novas_colunas = column_transformer.get_feature_names_out()

In [11]:
# Criar DataFrame com os dados transformados
dados_transformados = pd.DataFrame(dados_transformados, columns=novas_colunas)

In [12]:
# Exibindo as primeiras linhas do DataFrame codificado
print("\nPrimeiras 5 linhas do dataset codificado:")
display(dados_transformados.head())


Primeiras 5 linhas do dataset codificado:


Unnamed: 0,onehotencoder__customerID_0003-MKNFE,onehotencoder__customerID_0004-TLHLJ,onehotencoder__customerID_0011-IGKFF,onehotencoder__customerID_0013-EXCHZ,onehotencoder__customerID_0013-MHZWF,onehotencoder__customerID_0013-SMEOE,onehotencoder__customerID_0014-BMAQU,onehotencoder__customerID_0015-UOCOJ,onehotencoder__customerID_0016-QLJIS,onehotencoder__customerID_0017-DINOC,...,"onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 89.25, 'Total': '4652.4'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 89.4, 'Total': '6376.55'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 90.45, 'Total': '5957.9'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 90.5, 'Total': '4318.35'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 90.6, 'Total': '5817.45'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 91.25, 'Total': '6589.6'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 92.15, 'Total': '6480.9'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 92.45, 'Total': '6460.55'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 95.5, 'Total': '4627.85'}}",remainder__Churn
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,No
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,No
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Yes
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Yes
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Yes


Separando a Variável Alvo das Variáveis Explicativas

In [13]:
# Separando recursos e alvo

x = dados_transformados.copy()

y = dados['Churn']

# Verificação
if 'Churn' in x.columns:
    x = x.drop('Churn', axis=1)

display(x.head())
display(y.head())

Unnamed: 0,onehotencoder__customerID_0003-MKNFE,onehotencoder__customerID_0004-TLHLJ,onehotencoder__customerID_0011-IGKFF,onehotencoder__customerID_0013-EXCHZ,onehotencoder__customerID_0013-MHZWF,onehotencoder__customerID_0013-SMEOE,onehotencoder__customerID_0014-BMAQU,onehotencoder__customerID_0015-UOCOJ,onehotencoder__customerID_0016-QLJIS,onehotencoder__customerID_0017-DINOC,...,"onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 89.25, 'Total': '4652.4'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 89.4, 'Total': '6376.55'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 90.45, 'Total': '5957.9'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 90.5, 'Total': '4318.35'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 90.6, 'Total': '5817.45'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 91.25, 'Total': '6589.6'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 92.15, 'Total': '6480.9'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 92.45, 'Total': '6460.55'}}","onehotencoder__account_{'Contract': 'Two year', 'PaperlessBilling': 'Yes', 'PaymentMethod': 'Mailed check', 'Charges': {'Monthly': 95.5, 'Total': '4627.85'}}",remainder__Churn
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,No
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,No
2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Yes
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Yes
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Yes


Unnamed: 0,Churn
0,No
1,No
2,Yes
3,Yes
4,Yes


Verificação da Proporção de Evasão:

In [14]:
# Verificar os valores únicos na coluna 'Churn'
print("Valores únicos na coluna 'Churn':", dados['Churn'].unique())

Valores únicos na coluna 'Churn': ['No' 'Yes' nan]


In [15]:
# Calculando número de clientes que evadiram e que permaneceram
total_clientes = len(dados)
clientes_churn = len(dados[dados['Churn'] == 'Yes'])  # Assumindo que 'Yes' indica clientes que evadiram
clientes_ativos = len(dados[dados['Churn'] == 'No'])  # Assumindo que 'No' indica permaneceram ativos

In [16]:
# Calculo das proporções
proporcao_churn = clientes_churn / total_clientes # indica clientes que evadiram
proporcao_ativos = clientes_ativos / total_clientes # indica clientes que permaneceram ativos

In [17]:
# Calculo da proporção de evasão de clientes em relação aos ativos
proporcao_churn_vs_ativos = clientes_churn / clientes_ativos if clientes_ativos > 0 else 0

In [18]:
# Exibindo resultados
print(f"Total de clientes: {total_clientes}")
print(f"Clientes que evadiram: {clientes_churn} ({proporcao_churn:.2%})")
print(f"Clientes ativos: {clientes_ativos} ({proporcao_ativos:.2%})")
print(f"Proporção de clientes que evadiram em relação aos ativos: {proporcao_churn_vs_ativos:.2f}")

Total de clientes: 7267
Clientes que evadiram: 1869 (25.72%)
Clientes ativos: 5174 (71.20%)
Proporção de clientes que evadiram em relação aos ativos: 0.36


Avaliando desequilíbrio entre as classe

In [19]:
# Contar o número de clientes por classe
contagem_classes = dados['Churn'].value_counts()
total_clientes = len(dados)

In [20]:
# Calcular proporções
proporcoes = contagem_classes / total_clientes

In [21]:
# Exibir resultados
print("\nDistribuição das classes:")
for classe, contagem in contagem_classes.items():
    print(f"Classe {classe}: {contagem} ({proporcoes[classe]:.2%})")


Distribuição das classes:
Classe No: 5174 (71.20%)
Classe Yes: 1869 (25.72%)


In [22]:
# Avaliar desequilíbrio
razao = contagem_classes.min() / contagem_classes.max()
print(f"\nRazão entre a classe minoritária e majoritária: {razao:.2f}")
if razao < 0.3:
    print("Aviso: Há um desequilíbrio significativo entre as classes (razão < 0.3).")
elif razao < 0.5:
    print("Nota: Há um desequilíbrio moderado entre as classes (razão < 0.5).")
else:
    print("As classes estão relativamente equilibradas.")


Razão entre a classe minoritária e majoritária: 0.36
Nota: Há um desequilíbrio moderado entre as classes (razão < 0.5).


Verificar se há a necessidade de normalizar ou padronizar os dados

In [23]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

In [28]:
# Function to safely parse JSON-like strings and extract a value
def safe_get(data_string, key, default=None):
    try:
        data = json.loads(data_string.replace("'", '"'))
        return data.get(key, default)
    except (json.JSONDecodeError, AttributeError):
        return default

# Extracting nested data and creating new columns
dados['gender'] = dados['customer'].apply(lambda x: safe_get(x, 'gender'))
dados['SeniorCitizen'] = dados['customer'].apply(lambda x: safe_get(x, 'SeniorCitizen'))
dados['Partner'] = dados['customer'].apply(lambda x: safe_get(x, 'Partner'))
dados['Dependents'] = dados['customer'].apply(lambda x: safe_get(x, 'Dependents'))
dados['tenure'] = dados['customer'].apply(lambda x: safe_get(x, 'tenure'))

dados['PhoneService'] = dados['phone'].apply(lambda x: safe_get(x, 'PhoneService'))
dados['MultipleLines'] = dados['phone'].apply(lambda x: safe_get(x, 'MultipleLines'))

dados['InternetService'] = dados['internet'].apply(lambda x: safe_get(x, 'InternetService'))
dados['OnlineSecurity'] = dados['internet'].apply(lambda x: safe_get(x, 'OnlineSecurity'))
dados['OnlineBackup'] = dados['internet'].apply(lambda x: safe_get(x, 'OnlineBackup'))
dados['DeviceProtection'] = dados['internet'].apply(lambda x: safe_get(x, 'DeviceProtection'))
dados['TechSupport'] = dados['internet'].apply(lambda x: safe_get(x, 'TechSupport'))
dados['StreamingTV'] = dados['internet'].apply(lambda x: safe_get(x, 'StreamingTV'))
dados['StreamingMovies'] = dados['internet'].apply(lambda x: safe_get(x, 'StreamingMovies'))

dados['Contract'] = dados['account'].apply(lambda x: safe_get(x, 'Contract'))
dados['PaperlessBilling'] = dados['account'].apply(lambda x: safe_get(x, 'PaperlessBilling'))
dados['PaymentMethod'] = dados['account'].apply(lambda x: safe_get(x, 'PaymentMethod'))
dados['MonthlyCharges'] = dados['account'].apply(lambda x: safe_get(x, 'Charges', {}).get('Monthly'))
dados['TotalCharges'] = dados['account'].apply(lambda x: safe_get(x, 'Charges', {}).get('Total'))


# Convert new columns to appropriate types
dados['SeniorCitizen'] = pd.to_numeric(dados['SeniorCitizen'], errors='coerce')
dados['tenure'] = pd.to_numeric(dados['tenure'], errors='coerce')
dados['MonthlyCharges'] = pd.to_numeric(dados['MonthlyCharges'], errors='coerce')
dados['TotalCharges'] = pd.to_numeric(dados['TotalCharges'], errors='coerce')

# Fill NaN values with 0 after coercion
dados.fillna(0, inplace=True)


# Identify numerical variables after parsing
variaveis_numericas = dados.select_dtypes(include=['int64', 'float64']).columns
print("Variáveis numéricas detectadas:", variaveis_numericas)

Variáveis numéricas detectadas: Index(['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges'], dtype='object')


In [29]:
# ===== Normalização (0 to 1) =====
scaler_minmax = MinMaxScaler()
dados_normalizados = dados.copy()
dados_normalizados[variaveis_numericas] = scaler_minmax.fit_transform(dados[variaveis_numericas])

# Exibir DataFrame normalizado para verificar
print("\nPrimeiras 5 linhas do dataset normalizado:")
display(dados_normalizados.head())


Primeiras 5 linhas do dataset normalizado:


Unnamed: 0,customerID,Churn,customer,phone,internet,account,gender,SeniorCitizen,Partner,Dependents,...,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,0002-ORFBO,No,"{'gender': 'Female', 'SeniorCitizen': 0, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'One year', 'PaperlessBilling': '...",Female,0.0,Yes,Yes,...,Yes,No,Yes,Yes,No,One year,Yes,Mailed check,0.471144,0.068315
1,0003-MKNFE,No,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Male,0.0,No,No,...,No,No,No,No,Yes,Month-to-month,No,Mailed check,0.414428,0.062454
2,0004-TLHLJ,Yes,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Male,0.0,No,No,...,No,Yes,No,No,No,Month-to-month,Yes,Electronic check,0.553731,0.032338
3,0011-IGKFF,Yes,"{'gender': 'Male', 'SeniorCitizen': 1, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Male,1.0,Yes,No,...,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,0.793532,0.142531
4,0013-EXCHZ,Yes,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli...",Female,1.0,Yes,No,...,No,No,Yes,Yes,No,Month-to-month,Yes,Mailed check,0.653234,0.030789


In [None]:
print(dados.dtypes)   # mostra o tipo de cada coluna
print(dados.head())   # primeiras linhas para inspecionar


In [None]:
variaveis_numericas = dados.select_dtypes(include=['int64', 'float64']).columns
print("Variáveis numéricas detectadas:", variaveis_numericas)


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler_minmax = MinMaxScaler()
dados_normalizados = dados.copy()
dados_normalizados[variaveis_numericas] = scaler_minmax.fit_transform(dados[variaveis_numericas])

print(dados_normalizados.head())

# Balanceamento de Classes

In [None]:
X = dados_normalizados.drop('Churn', axis=1)  # variáveis explicativas
y = dados_normalizados['Churn']              # variável alvo


In [None]:
# Remova a coluna original 'Churn' de X se ela existir
if 'Churn' in X.columns:
    X = X.drop('Churn', axis=1)

# Remova as linhas com valores NaN de y e alinhe X pelo índice
y = y.dropna()
X = X.loc[y.index]

#Dividir em treino e teste
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Divisão de dados com sucesso!")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

🎯 Análise de Correlação

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# ==============================
# 1. Calcular matriz de correlação
# ==============================
# Use apenas as colunas numéricas para o cálculo de correlação
# Podemos usar o DataFrame 'dados_numericos', que já contém apenas dados numéricos
correlacao = dados_numericos.corr()

# Exibir a matriz de correlação
print("\nMatriz de Correlação das Variáveis Numéricas:")
display(correlacao)

# ==============================
# 2. Visualizar a matriz de correlação (heatmap)
# ==============================
plt.figure(figsize=(10, 8))
sns.heatmap(correlacao, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5)
plt.title('Matriz de Correlação das Variáveis Numéricas')
plt.show()

In [None]:
# Drop rows with NaN values in the 'Churn' column from 'dados' before processing
dados_cleaned = dados.dropna(subset=['Churn']).copy()

# Convert 'Churn' to a numerical format (e.g., 0 for 'No', 1 for 'Yes')
dados_cleaned['Churn_numerical'] = dados_cleaned['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)

# Select only the numerical columns from the cleaned DataFrame, excluding the original 'Churn' and the new numerical 'Churn'
# We need to ensure we are using the numerical columns after parsing the nested data
numerical_cols_after_parsing = dados_cleaned.select_dtypes(include=['int64', 'float64']).columns.tolist()
# Remove the newly created 'Churn_numerical' from the list of features for correlation with itself
if 'Churn_numerical' in numerical_cols_after_parsing:
    numerical_cols_after_parsing.remove('Churn_numerical')


# Calculate the correlation between numerical features and the numerical 'Churn'
correlacoes = dados_cleaned[numerical_cols_after_parsing].corrwith(dados_cleaned['Churn_numerical'])


# Order by absolute correlation
correlacoes_ordenadas = correlacoes.abs().sort_values(ascending=False)

print("Correlação das variáveis numéricas com a evasão (Churn):")
print(correlacoes_ordenadas)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,6))
correlacoes_ordenadas.plot(kind='bar')

plt.title("Correlação das variáveis com a evasão (target)")
plt.ylabel("Coeficiente de Correlação")
plt.xlabel("Variáveis")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


# Análises Direcionadas

# 🤖 Modelagem Preditiva

Trabalhando o Balanceamento de Classes

🎯 Correlação e Seleção de Variáveis

🎯 Matriz de correlação

🤖 Modelagem Preditiva

In [27]:
import pandas as pd
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler, StandardScaler
import json