In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score, confusion_matrix


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Definindo o Dataframe
caminho = '/kaggle/input/customer-churn-data/Bank Customer Churn Prediction.csv'
df = pd.read_csv(caminho)

In [None]:
# Mostrando os 5 primeiros valores
df.head(5)

In [None]:
# Retirando a coluna "customer_id" e churn
df_filtrado = df.drop(['customer_id', 'churn'], axis=1)

# Estatísticas básicas
df_filtrado.describe()

# **Pré-Processamento dos dados**

In [None]:
# Separando as features numéricas das categóricas
numerica_features = ['credit_score', 'age', 'balance', 'products_number', 'estimated_salary']
categorica_features = ['country', 'gender', 'credit_card', 'active_member']

# Aplicando codificação one-hot nas variáveis categóricas
codificar = OneHotEncoder(drop='first', sparse=False)
df_codificado = pd.concat([df[numerica_features], pd.DataFrame(codificar.fit_transform(df[categorica_features]))], axis=1)

# Dividindo os dados em dados de treino e dados de teste
X = df_codificado
y = df['churn']
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.2, random_state=42)

# Padronizando os dados
scaler = StandardScaler()
X_treino[numerica_features] = scaler.fit_transform(X_treino[numerica_features])
X_teste[numerica_features] = scaler.transform(X_teste[numerica_features])

# Convertendo todas as colunas para tipo string
X_treino.columns = X_treino.columns.astype(str)
X_teste.columns = X_teste.columns.astype(str)

# Verificando o resultado após as alterações
print(X_treino)
print(X_teste)

# **Criação do Modelo**

In [None]:
# Criando o modelo
modelo = SVC(kernel='linear', random_state=42, class_weight='balanced')

# Treinando o modelo
modelo.fit(X_treino, y_treino)


# **Avaliando o modelo**

In [None]:
# Obtendo as previsões do modelo nos dados de teste
y_pred = modelo.predict(X_teste)

# Calculando as métricas de avaliação
accuracy = accuracy_score(y_teste, y_pred)
precision = precision_score(y_teste, y_pred)
recall = recall_score(y_teste, y_pred)
f1 = f1_score(y_teste, y_pred)
conf_matrix = confusion_matrix(y_teste, y_pred)

# Imprimindo as métricas de avaliação
print("Acurácia:", accuracy)
print("Precisão:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Matriz de Confusão:")
print(conf_matrix)
