### Importing the necessary libaries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from flask import Flask, jsonify, request

### Loading the dataset

In [14]:
df = pd.read_csv('churn-bigml-20.csv')
df.head()

Unnamed: 0,State,Account length,Area code,International plan,Voice mail plan,Number vmail messages,Total day minutes,Total day calls,Total day charge,Total eve minutes,Total eve calls,Total eve charge,Total night minutes,Total night calls,Total night charge,Total intl minutes,Total intl calls,Total intl charge,Customer service calls,Churn
0,LA,117,408,No,No,0,184.5,97,31.37,351.6,80,29.89,215.8,90,9.71,8.7,4,2.35,1,False
1,IN,65,415,No,No,0,129.1,137,21.95,228.5,83,19.42,208.8,111,9.4,12.7,6,3.43,4,True
2,NY,161,415,No,No,0,332.9,67,56.59,317.8,97,27.01,160.6,128,7.23,5.4,9,1.46,4,True
3,SC,111,415,No,No,0,110.4,103,18.77,137.3,102,11.67,189.6,105,8.53,7.7,6,2.08,2,False
4,HI,49,510,No,No,0,119.3,117,20.28,215.1,109,18.28,178.7,90,8.04,11.1,1,3.0,1,False


### Encode categorical variables

In [15]:
le = LabelEncoder()
df["International plan"] = le.fit_transform(df["International plan"] )
df["Voice mail plan"] = le.fit_transform(df["Voice mail plan"])


# One-Hot Encoding
df = pd.get_dummies(df,columns=['State'], drop_first=True)


In [16]:
X = df.drop(columns=['Churn'])
y = df['Churn'].astype(int)      

In [17]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [18]:
X_train,X_test,y_train,y_test = train_test_split(X_scaled,y,test_size=0.2, random_state=42)


In [19]:
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train,y_train)

y_pred = dt_classifier.predict(X_test)


In [20]:
accuracy = accuracy_score(y_test,y_pred)
report = classification_report(y_test,y_pred)


In [21]:
print(f"accuracy:{accuracy:.2f}")
print(f"Classification Report:\n",report)

accuracy:0.90
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.92      0.94       119
           1       0.55      0.73      0.63        15

    accuracy                           0.90       134
   macro avg       0.76      0.83      0.79       134
weighted avg       0.92      0.90      0.91       134



In [25]:
customer_behaviour = {}

for idx, row in df.iterrows():
    customer_behaviour[idx] = {
        "Account Length": row["Account length"],
        "International Plan": row["International plan"],
        "Voice Mail Plan" : row["Voice mail plan"],
        "Total Day Minutes": row["Total day minutes"],
        "Total Eve Minutes": row["Total eve minutes"],
        "Total Night Minutes": row["Total night minutes"],
        "Total Intl Minutes": row["Total intl minutes"],
        "Total Calls": row["Total day calls"] + row["Total night calls"] +row["Total eve calls"]+ row["Total intl calls"],
        "Total Charges": row["Total day charge"] + row["Total eve charge"] + row["Total night charge"] + row["Total intl charge"],
        "Customer Service Calls": row["Customer service calls"],
        "Churn": row["Churn"]
        
    }
def get_customer_behavior(customer_id):
    return customer_behaviour.get(customer_id, "Customer not found")

example_customer_id = np.random.choice(list(customer_behaviour.keys()))
print(f"Customer ID: {example_customer_id}")
print(get_customer_behavior(example_customer_id))

Customer ID: 162
{'Account Length': 33, 'International Plan': 0, 'Voice Mail Plan': 1, 'Total Day Minutes': 186.8, 'Total Eve Minutes': 261.0, 'Total Night Minutes': 317.8, 'Total Intl Minutes': 15.0, 'Total Calls': 301, 'Total Charges': 72.3, 'Customer Service Calls': 0, 'Churn': False}
