In [None]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

#Import data
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

#Remove unnecessary columns
df.drop(['customerID'], axis=1, inplace=True)

#Preprocess data (One-hot) and Combine Features into groups

def combined_securitysupport(row):
    if row['InternetService'] == 'No':
        return 'No internet service'
    
    count_yes = sum([
        row['OnlineSecurity'] == 'Yes',
        row['OnlineBackup'] == 'Yes',
        row['DeviceProtection'] == 'Yes',
        row['TechSupport'] == 'Yes'
    ])
    
    if count_yes == 0:
        return 'None'
    elif count_yes == 4:
        return 'All'
    else:
        return 'Some'

def combined_entertainment(row):
    if row['InternetService'] == 'No':
        return 'No internet service'
    
    count_yes = sum([
        row['StreamingTV'] == 'Yes',
        row['StreamingMovies'] == 'Yes'
    ])
    
    if count_yes == 0:
        return 'None'
    elif count_yes == 2:
        return 'Both'
    else:
        return 'One'

def combined_lineservice(row):
    if row['PhoneService'] == 'No':
        return 'No phone service'
    elif row['MultipleLines'] == 'Yes':
        return 'Multiple lines'
    else:
        return 'Single line'
    
df['PhoneLineStatus'] = df.apply(combined_lineservice, axis=1)

df['SecuritySupportLevel'] = df.apply(combined_securitysupport, axis=1)

df['EntertainmentLevel'] = df.apply(combined_entertainment, axis=1)

df.drop(['OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'MultipleLines'], axis=1, inplace=True)

df['Contract'] = df['Contract'].map({'Month-to-month': 0, 'One year': 1, 'Two year': 2})

df['Partner'] = df['Partner'].map({'Yes': 1, 'No': 0})

df['Dependents'] = df['Dependents'].map({'Yes': 1, 'No': 0})

df['PhoneService'] = df['PhoneService'].map({'Yes': 1, 'No': 0})

df['PaperlessBilling'] = df['PaperlessBilling'].map({'Yes': 1, 'No': 0})

df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

df['InternetService'] = df['InternetService'].map({'DSL': 1, 'Fiber optic': 2, 'No': 0})

encoder = OneHotEncoder(sparse_output=False, drop=None)
encoded = encoder.fit_transform(df[['PhoneLineStatus']])

encoded_df = pd.DataFrame(
    encoded,
    columns=encoder.get_feature_names_out(['PhoneLineStatus'])
)

# Combine with original DataFrame
df = pd.concat([df.drop('PhoneLineStatus', axis=1), encoded_df], axis=1)

df['SecuritySupportLevel'] = df['SecuritySupportLevel'].map({
    'No internet service': 0,
    'None': 1, 
    'Some': 2,
    'All': 3
})

df['EntertainmentLevel'] = df['EntertainmentLevel'].map({
    'No internet service': 0,
    'None': 1,
    'One': 2,
    'Both': 3
})







#TODO: Independent and Dependent variables
#TODO: Train-test split
#TODO: Model training on training set
#TODO: Model evaluation on test set
#TODO: Visualizations


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,InternetService,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,SecuritySupportLevel,EntertainmentLevel,PhoneLineStatus_Multiple lines,PhoneLineStatus_No phone service,PhoneLineStatus_Single line
0,Female,0,1,0,1,0,1,0,1,Electronic check,29.85,29.85,0,2,1,0.0,1.0,0.0
1,Male,0,0,0,34,1,1,1,0,Mailed check,56.95,1889.5,0,2,1,0.0,0.0,1.0
2,Male,0,0,0,2,1,1,0,1,Mailed check,53.85,108.15,1,2,1,0.0,0.0,1.0
3,Male,0,0,0,45,0,1,1,0,Bank transfer (automatic),42.30,1840.75,0,2,1,0.0,1.0,0.0
4,Female,0,0,0,2,1,2,0,1,Electronic check,70.70,151.65,1,1,1,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,Male,0,1,1,24,1,1,1,1,Mailed check,84.80,1990.5,0,2,3,1.0,0.0,0.0
7039,Female,0,1,1,72,1,2,1,1,Credit card (automatic),103.20,7362.9,0,2,3,1.0,0.0,0.0
7040,Female,0,1,1,11,0,1,0,1,Electronic check,29.60,346.45,0,2,1,0.0,1.0,0.0
7041,Male,1,1,0,4,1,2,0,1,Mailed check,74.40,306.6,1,1,1,1.0,0.0,0.0
