## Customer churn prediction
### Feed-forward Neural Networks

In [1]:
import pandas as pd
import numpy as np
import warnings
import plotly.graph_objects as go

warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('/Users/sheidamajidi/Desktop/Winter2024/Winter2024-2/MGSC673/Assignments/Assignment2/telecom_churn.csv')
data

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
0,0,128,1,1,2.70,1,265.1,110,89.0,9.87,10.0
1,0,107,1,1,3.70,1,161.6,123,82.0,9.78,13.7
2,0,137,1,0,0.00,0,243.4,114,52.0,6.06,12.2
3,0,84,0,0,0.00,2,299.4,71,57.0,3.10,6.6
4,0,75,0,0,0.00,3,166.7,113,41.0,7.42,10.1
...,...,...,...,...,...,...,...,...,...,...,...
3328,0,192,1,1,2.67,2,156.2,77,71.7,10.78,9.9
3329,0,68,1,0,0.34,3,231.1,57,56.4,7.67,9.6
3330,0,28,1,0,0.00,2,180.8,109,56.0,14.44,14.1
3331,0,184,0,0,0.00,2,213.8,105,50.0,7.98,5.0


### Exploratory Data Analysis

In [3]:
data.describe()

Unnamed: 0,Churn,AccountWeeks,ContractRenewal,DataPlan,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,OverageFee,RoamMins
count,3333.0,3333.0,3333.0,3333.0,3333.0,3333.0,3333.0,3333.0,3333.0,3333.0,3333.0
mean,0.144914,101.064806,0.90309,0.276628,0.816475,1.562856,179.775098,100.435644,56.305161,10.051488,10.237294
std,0.352067,39.822106,0.295879,0.447398,1.272668,1.315491,54.467389,20.069084,16.426032,2.535712,2.79184
min,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0
25%,0.0,74.0,1.0,0.0,0.0,1.0,143.7,87.0,45.0,8.33,8.5
50%,0.0,101.0,1.0,0.0,0.0,1.0,179.4,101.0,53.5,10.07,10.3
75%,0.0,127.0,1.0,1.0,1.78,2.0,216.4,114.0,66.2,11.77,12.1
max,1.0,243.0,1.0,1.0,5.4,9.0,350.8,165.0,111.3,18.19,20.0


##### Missing values?

In [7]:
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)

Missing values in each column:
 Churn              0
AccountWeeks       0
ContractRenewal    0
DataPlan           0
DataUsage          0
CustServCalls      0
DayMins            0
DayCalls           0
MonthlyCharge      0
OverageFee         0
RoamMins           0
dtype: int64


##### What percentage of 1 and 0?

In [8]:
def plot_category_percentages_bar(df, column):
    counts = df[column].value_counts(normalize=True) * 100
    labels = [str(int(index)) for index in counts.index]
    values = counts.values
    fig = go.Figure(data=[go.Bar(x=labels, y=values, text=[f'{v:.2f}%' for v in values], textposition='auto')])
    fig.update_layout(title_text=f'Percentage of Categories in {column}', xaxis_title='Category', yaxis_title='Percentage', title_x=0.5)
    fig.show()

columns_to_plot = ['DataPlan', 'Churn', 'ContractRenewal']
for column in columns_to_plot:
    plot_category_percentages_bar(data, column)


##### Churn vs Count of variables

In [10]:
# Plot for AccountWeeks
data['AccountWeeks_bin'] = pd.cut(data['AccountWeeks'], bins=range(0, data['AccountWeeks'].max() + 26, 26), right=False)
account_weeks_churn = data.groupby(['AccountWeeks_bin', 'Churn']).size().unstack(fill_value=0)
fig = go.Figure()
fig.add_trace(go.Bar(x=account_weeks_churn.index.astype(str), 
                     y=account_weeks_churn[0], 
                     name='Churn 0', marker_color='grey'))
fig.add_trace(go.Bar(x=account_weeks_churn.index.astype(str), 
                     y=account_weeks_churn[1], 
                     name='Churn 1', marker_color='green'))
fig.update_layout(barmode='group', title='Churn by Account Weeks', title_x=0.5,
                   xaxis_title='Account Weeks Bin', yaxis_title='Count')
fig.show()

In [11]:
# Plot for DataUsage
bins = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]
data['DataUsage_bin'] = pd.cut(data['DataUsage'], bins=bins, right=False)
data_usage_churn = data.groupby(['DataUsage_bin', 'Churn']).size().unstack(fill_value=0)
fig = go.Figure()
fig.add_trace(go.Bar(x=data_usage_churn.index.astype(str), 
                     y=data_usage_churn[0], name='Churn 0', 
                     marker_color='grey'))
fig.add_trace(go.Bar(x=data_usage_churn.index.astype(str), 
                     y=data_usage_churn[1], name='Churn 1', 
                     marker_color='green'))
fig.update_layout(barmode='group', title='Churn by Data Usage', title_x=0.5,
                   xaxis_title='Data Usage Bin (GB)', yaxis_title='Count')
fig.show()


In [12]:
# Plot for CustServCalls
custserv_calls_churn = data.groupby(['CustServCalls', 'Churn']).size().unstack(fill_value=0)
fig = go.Figure()
fig.add_trace(go.Bar(x=custserv_calls_churn.index.astype(str), 
                     y=custserv_calls_churn[0], name='Churn 0', 
                     marker_color='grey'))
fig.add_trace(go.Bar(x=custserv_calls_churn.index.astype(str), 
                     y=custserv_calls_churn[1], name='Churn 1', 
                     marker_color='green'))
fig.update_layout(barmode='group', title='Churn by Customer Service Calls',title_x=0.5,
                   xaxis_title='Customer Service Calls', yaxis_title='Count')
fig.show()


In [13]:
# Plot for MonthlyCharge
data['MonthlyCharge_bin'] = pd.qcut(data['MonthlyCharge'], q=10)
monthly_charge_churn = data.groupby(['MonthlyCharge_bin', 'Churn']).size().unstack(fill_value=0)
fig = go.Figure()
fig.add_trace(go.Bar(x=monthly_charge_churn.index.astype(str), 
                     y=monthly_charge_churn[0], name='Churn 0', 
                     marker_color='grey'))
fig.add_trace(go.Bar(x=monthly_charge_churn.index.astype(str), 
                     y=monthly_charge_churn[1], name='Churn 1', 
                     marker_color='green'))
fig.update_layout(barmode='group', title='Churn by Monthly Charge', title_x=0.5,
                  xaxis_title='Monthly Charge Bin ($)', yaxis_title='Count')
fig.show()


In [14]:
# Plot for OverageFee
data['OverageFee_bin'] = pd.qcut(data['OverageFee'], q=10)
overage_fee_churn = data.groupby(['OverageFee_bin', 'Churn']).size().unstack(fill_value=0)
fig = go.Figure()
fig.add_trace(go.Bar(x=overage_fee_churn.index.astype(str), 
                     y=overage_fee_churn[0], name='Churn 0', 
                     marker_color='grey'))
fig.add_trace(go.Bar(x=overage_fee_churn.index.astype(str), 
                     y=overage_fee_churn[1], name='Churn 1', 
                     marker_color='green'))
fig.update_layout(barmode='group', title='Churn by Overage Fee', title_x=0.5,
                  xaxis_title='Overage Fee Bin ($)', yaxis_title='Count')
fig.show()


#### Split dataset into training and testing set, with Target as "Churn"

In [15]:
data.columns

Index(['Churn', 'AccountWeeks', 'ContractRenewal', 'DataPlan', 'DataUsage',
       'CustServCalls', 'DayMins', 'DayCalls', 'MonthlyCharge', 'OverageFee',
       'RoamMins', 'AccountWeeks_bin', 'DataUsage_bin', 'MonthlyCharge_bin',
       'OverageFee_bin'],
      dtype='object')

In [None]:
VARS = ['AccountWeeks', 
        'ContractRenewal', 
        'DataPlan', 
        'DataUsage',
        'CustServCalls', 
        'DayMins', 
        'DayCalls', 
        'MonthlyCharge', 
        'OverageFee',
        'RoamMins', 
        'AccountWeeks_bin', 
        'DataUsage_bin', 
        'MonthlyCharge_bin',
        'OverageFee_bin'
    ]
TARGET = ['churn']

x = data[VARS]
y = data[TARGET]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 123)
x_fit , x_val, y_fit, y_val = train_test_split(x_train, y_train, test_size = 0.2, random_state = 123)
print('Shape of train:', x_fit.shape)
print('Shape of validation:', x_val.shape)
print('Shape of test:', x_test.shape)