# Customer Churn

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Load in data
df = pd.read_csv('https://static.bc-edx.com/ai/ail-v-1-0/m13/lesson_3/datasets/customer-churn.csv')
df

Unnamed: 0,Call Failure,Complains,Subscription Length,Charge Amount,Seconds of Use,Frequency of use,Frequency of SMS,Distinct Called Numbers,Age Group,Tariff Plan,Status,Age,Customer Value,Churn
0,8,0,38,0,4370,71,5,17,3,1,1,30,197.640,0
1,0,0,39,0,318,5,7,4,2,1,2,25,46.035,0
2,10,0,37,0,2453,60,359,24,3,1,1,30,1536.520,0
3,10,0,38,0,4198,66,1,35,1,1,1,15,240.020,0
4,3,0,38,0,2393,58,2,33,1,1,1,15,145.805,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,21,0,19,2,6697,147,92,44,2,2,1,25,721.980,0
3146,17,0,17,1,9237,177,80,42,5,1,1,55,261.210,0
3147,13,0,18,4,3157,51,38,21,3,1,1,30,280.320,0
3148,7,0,11,2,4695,46,222,12,3,1,1,30,1077.640,0


In [3]:
# Drop the label to create the X data
X = df.drop('Churn', axis=1)
X

Unnamed: 0,Call Failure,Complains,Subscription Length,Charge Amount,Seconds of Use,Frequency of use,Frequency of SMS,Distinct Called Numbers,Age Group,Tariff Plan,Status,Age,Customer Value
0,8,0,38,0,4370,71,5,17,3,1,1,30,197.640
1,0,0,39,0,318,5,7,4,2,1,2,25,46.035
2,10,0,37,0,2453,60,359,24,3,1,1,30,1536.520
3,10,0,38,0,4198,66,1,35,1,1,1,15,240.020
4,3,0,38,0,2393,58,2,33,1,1,1,15,145.805
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145,21,0,19,2,6697,147,92,44,2,2,1,25,721.980
3146,17,0,17,1,9237,177,80,42,5,1,1,55,261.210
3147,13,0,18,4,3157,51,38,21,3,1,1,30,280.320
3148,7,0,11,2,4695,46,222,12,3,1,1,30,1077.640


In [4]:
# Create the y set from the "Churn" column
y = df["Churn"]
y

0       0
1       0
2       0
3       0
4       0
       ..
3145    0
3146    0
3147    0
3148    0
3149    1
Name: Churn, Length: 3150, dtype: int64

In [5]:
# Split the data into training and testing sets using random_state=1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [6]:
# Scale the X data by using StandardScaler()
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_train_scaled

array([[-1.0715273 ,  3.41042755, -2.78076233, ..., -0.57572071,
        -0.10596967, -0.83679706],
       [-0.0998478 , -0.29321837,  0.63628839, ..., -0.57572071,
        -0.10596967, -0.53841325],
       [-0.65509323, -0.29321837, -2.30944499, ..., -0.57572071,
        -0.10596967, -0.73208354],
       ...,
       [-0.0998478 , -0.29321837, -2.30944499, ..., -0.57572071,
         1.6084163 , -0.16953522],
       [-0.93271594, -0.29321837, -0.07068762, ..., -0.57572071,
        -0.67743166, -0.58547315],
       [-0.51628187, -0.29321837,  0.40062972, ...,  1.73695331,
        -0.10596967, -0.74623402]])

In [7]:
# Transform the test dataset based on the fit from the training dataset
X_test_scaled = scaler.transform(X_test)
X_test_scaled

array([[-1.0715273 , -0.29321837,  0.51845905, ...,  1.73695331,
        -0.67743166, -0.90540775],
       [-1.0715273 , -0.29321837, -1.0133223 , ..., -0.57572071,
        -0.10596967, -0.70921332],
       [-0.51628187, -0.29321837,  0.51845905, ...,  1.73695331,
        -0.10596967, -0.49221386],
       ...,
       [-0.37747051, -0.29321837, -0.18851696, ..., -0.57572071,
        -0.67743166,  0.964721  ],
       [-0.23865915, -0.29321837,  0.63628839, ..., -0.57572071,
        -0.67743166,  1.75732929],
       [-0.23865915, -0.29321837,  1.22543506, ..., -0.57572071,
         1.6084163 ,  1.59138628]])

## Model and Fit to a Logistic Regression Classifier

In [8]:
# Create the logistic regression classifier model with a random_state of 1
lr_model = LogisticRegression()

# Fit the model to the training data
lr_model.fit(X_train_scaled, y_train)

In [9]:
# Validate the model by checking the model accuracy with model.score
print(f"Training Data Score: {lr_model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {lr_model.score(X_test_scaled, y_test)}")

Training Data Score: 0.8954276037256562
Testing Data Score: 0.8934010152284264


## Model and Fit to a Support Vector Machine

In [10]:
# Create the support vector machine classifier model with a 'rbf' kernel
svm_model = SVC(kernel='rbf')

# Fit the model to the training data
svm_model.fit(X_train_scaled, y_train)

In [11]:
# Validate the model by checking the model accuracy with model.score
print(f"Training Data Score: {svm_model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {svm_model.score(X_test_scaled, y_test)}")

Training Data Score: 0.9301439458086368
Testing Data Score: 0.932741116751269


## Model and Fit to a KNN Model

In [12]:
# Create the KNN model with 9 neighbors
knn_model = KNeighborsClassifier(n_neighbors=9)

# Fit the model to the training data
knn_model.fit(X_train_scaled, y_train)

In [13]:
# Validate the model by checking the model accuracy with model.score
print(f"Training Data Score: {knn_model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {knn_model.score(X_test_scaled, y_test)}")

Training Data Score: 0.9521591871295513
Testing Data Score: 0.9517766497461929


## Model and Fit to a Decision Tree Classifier

In [14]:
# Create the decision tree classifier model
dt_model = DecisionTreeClassifier()

# Fit the model to the training data
dt_model.fit(X_train_scaled, y_train)

In [15]:
# Validate the model by checking the model accuracy with model.score
print(f"Training Data Score: {dt_model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {dt_model.score(X_test_scaled, y_test)}")

Training Data Score: 0.9949195596951735
Testing Data Score: 0.9276649746192893


## Model and Fit to a Random Forest Classifier

In [16]:
# Create the random forest classifier model
# with n_estimators=128 and random_state=1
rf_model = RandomForestClassifier(n_estimators=128, random_state=1)

# Fit the model to the training data
rf_model.fit(X_train_scaled, y_train)

In [17]:
# Validate the model by checking the model accuracy with model.score
print(f"Training Data Score: {rf_model.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {rf_model.score(X_test_scaled, y_test)}")

Training Data Score: 0.9949195596951735
Testing Data Score: 0.949238578680203
