# Preprocessing

In [1]:
import pandas as pd
from sklearn.utils import resample
from sklearn.preprocessing import StandardScaler
import joblib

In [2]:
df = pd.read_csv('../Data/cleaned.csv')

## Binary Type

In [3]:
df['plan_type'] = df['plan_type'].map({
    1 : 0,
    2 : 1
})

df['account_status'] = df['account_status'].map({
    1 : 0,
    2 : 1
})

## Balance Classes

In [4]:
df_majotity = df[df['has_churned']==0] # No Churned
df_minority = df[df['has_churned']==1] # Churned

In [5]:
df_majotity_downsampled = resample(
    df_majotity,
    replace=False,
    n_samples=int(len(df_majotity)*0.7),
    random_state=13
)

df_minority_upsampled = resample(
    df_minority,
    replace=True,
    n_samples=int(len(df_minority)*2),
    random_state=13
)

In [6]:
df_balanced = pd.concat([df_majotity_downsampled, df_minority_upsampled])
df_balanced = df_balanced.sample(frac=1, random_state=13).reset_index(drop=True)

## Scaling

In [7]:
X = df_balanced.drop(columns=['has_churned'])
y = df_balanced['has_churned']

In [8]:
scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)
X_scaler = pd.DataFrame(X_scaler, columns=X.columns)
X_scaler['target'] = y

joblib.dump(scaler, "../Outputs/Models/scaler.pkl") 

X_scaler.to_csv('../Data/preprocessing.csv', index=False)