# Packages

In [17]:
# Data manipulation
import pandas as pd
import numpy as np

# Models
from sklearn.linear_model import LogisticRegression
from sklearn.dummy import DummyClassifier

# Metrics
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Others
import os

# Setting parameters
sns.set_palette("dark")
sns.set_style("whitegrid")

os.chdir(r'J:\Estudo\Projetos\telecom_churn_prediction\data')

# Train/Test Data

In [18]:
# Train
df_train = pd.read_csv('train.csv')
x_train = df_train.drop(columns = 'Churn')
y_train = df_train[['Churn']]
print(f'Train data shape: {df_train.shape}')

# Test
df_test = pd.read_csv('test.csv')
x_test = df_test.drop(columns = 'Churn')
y_test = df_test[['Churn']]
print(f'Test data shape: {df_test.shape}')

Train data shape: (4930, 21)
Test data shape: (1056, 21)


# Baseline
- The baseline will be a Dummy Classifier that will predict the Churn based only on the train distribution of 1 (Yes) and 0 (No).
- The most important metric will be __Recall Score__, as we need to minimize false negatives to better suit business requirements.

In [26]:
# Function to aggregate SKLearn score metrics 
def evaluate_model(model, x, y):

    # Using the model to predict new values
    y_hat = model.predict(x)

    # Calculating the metrics
    _accuracy_score = accuracy_score(y, y_hat)
    _f1_score = f1_score(y, y_hat)
    _recall_score = recall_score(y, y_hat)
    _precision_score = precision_score(y, y_hat, zero_division = 0)

    # Showing the metrics
    print('Model Metrics:')
    print(f'Accuracy Score: {_accuracy_score:.2f}')
    print(f'F1 Score: {_f1_score:.2f}')
    print(f'Recall Score: {_recall_score:.2f}')
    print(f'Precision Score: {_precision_score:.2f}')

In [45]:
# Training Baseline Model
baseline = DummyClassifier(
    strategy = 'stratified',
    random_state = 78
).fit(
    X = x_train, y = y_train
)

# Evaluating the baseline 
evaluate_model(
    model = baseline,
    x = x_test,
    y = y_test
)

Model Metrics:
Accuracy Score: 0.60
F1 Score: 0.22
Recall Score: 0.22
Precision Score: 0.21


# Preprocessing 