In [None]:
#Import libraries

import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder

#Import data
df = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

#Remove unnecessary columns (customerID is an identifier)
df.drop(['customerID'], axis=1, inplace=True)

#Group features

#Online Security, Online Backup, Device Protection, Tech Support
def combined_securitysupport(row):
    if row['InternetService'] == 'No':
        return 'No internet service'
    
    count_yes = sum([
        row['OnlineSecurity'] == 'Yes',
        row['OnlineBackup'] == 'Yes',
        row['DeviceProtection'] == 'Yes',
        row['TechSupport'] == 'Yes'
    ])
    
    if count_yes == 0:
        return 'None'
    elif count_yes == 4:
        return 'All'
    else:
        return 'Some'

#Streaming TV, Streaming Movies
def combined_entertainment(row):
    if row['InternetService'] == 'No':
        return 'No internet service'
    
    count_yes = sum([
        row['StreamingTV'] == 'Yes',
        row['StreamingMovies'] == 'Yes'
    ])
    
    if count_yes == 0:
        return 'None'
    elif count_yes == 2:
        return 'Both'
    else:
        return 'One'

#Multiple Lines, Phone Service, or No phone service
def combined_lineservice(row):
    if row['PhoneService'] == 'No':
        return 'No phone service'
    elif row['MultipleLines'] == 'Yes':
        return 'Multiple lines'
    else:
        return 'Single line'
    
#One-hot encode certain features

#Create new combined features
df['PhoneLineStatus'] = df.apply(combined_lineservice, axis=1)

df['SecuritySupportLevel'] = df.apply(combined_securitysupport, axis=1)

df['EntertainmentLevel'] = df.apply(combined_entertainment, axis=1)

#Categories for one-hot encoding
one_hot_columns = ['gender', 'SeniorCitizen','Partner', 'Dependents', 'PaperlessBilling', 'Churn', ]

encoder = OneHotEncoder(sparse_output=False)

one_hot_encoded = encoder.fit_transform(df[one_hot_columns])

one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(one_hot_columns))

df_encoded = pd.concat([df, one_hot_df], axis=1)

df_encoded = df_encoded.drop(one_hot_columns, axis=1)

df_encoded = pd.get_dummies(df, columns=one_hot_columns, drop_first=True)

df = df_encoded.copy()


#Manual Ordinal Encoding
df['SecuritySupportLevel'] = df['SecuritySupportLevel'].map({
    'No internet service': 0,
    'None': 1, 
    'Some': 2,
    'All': 3
})

df['EntertainmentLevel'] = df['EntertainmentLevel'].map({
    'No internet service': 0,
    'None': 1,
    'One': 2,
    'Both': 3
})

df['PhoneLineStatus'] = df['PhoneLineStatus'].map({
    'No phone service': 0,
    'Single line': 1,
    'Multiple lines': 2
})

df['Contract'] = df['Contract'].map({
    'Month-to-month': 0, 
    'One year': 1, 
    'Two year': 2})

df['InternetService'] = df['InternetService'].map({
    'DSL': 1, 
    'Fiber optic': 2,    'No': 0})

df


#TODO: Independent and Dependent variables
#TODO: Train-test split
#TODO: Model training on training set
#TODO: Model evaluation on test set
#TODO: Visualizations


Unnamed: 0,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,...,TotalCharges,PhoneLineStatus,SecuritySupportLevel,EntertainmentLevel,gender_Male,SeniorCitizen_1,Partner_Yes,Dependents_Yes,PaperlessBilling_Yes,Churn_Yes
0,1,No,No phone service,1,No,Yes,No,No,No,No,...,29.85,0,2,1,False,False,True,False,True,False
1,34,Yes,No,1,Yes,No,Yes,No,No,No,...,1889.5,1,2,1,True,False,False,False,False,False
2,2,Yes,No,1,Yes,Yes,No,No,No,No,...,108.15,1,2,1,True,False,False,False,True,True
3,45,No,No phone service,1,Yes,No,Yes,Yes,No,No,...,1840.75,0,2,1,True,False,False,False,False,False
4,2,Yes,No,2,No,No,No,No,No,No,...,151.65,1,1,1,False,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,24,Yes,Yes,1,Yes,No,Yes,Yes,Yes,Yes,...,1990.5,2,2,3,True,False,True,True,True,False
7039,72,Yes,Yes,2,No,Yes,Yes,No,Yes,Yes,...,7362.9,2,2,3,False,False,True,True,True,False
7040,11,No,No phone service,1,Yes,No,No,No,No,No,...,346.45,0,2,1,False,False,True,True,True,False
7041,4,Yes,Yes,2,No,No,No,No,No,No,...,306.6,2,1,1,True,True,True,False,True,True
