In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.impute import SimpleImputer


Datapath = "C:\\Users\\HP\\Downloads\\archive (1)\\"
Datafile = "customer_churn_telecom_services.csv"

df =  pd.read_csv(Datapath + Datafile)

In [None]:
# Data cleaning
# To check for missing values
print("Missing values before cleaning:")
df.isnull().sum()

Missing values before cleaning:


gender               0
SeniorCitizen        0
Partner              0
Dependents           0
tenure               0
PhoneService         0
MultipleLines        0
InternetService      0
OnlineSecurity       0
OnlineBackup         0
DeviceProtection     0
TechSupport          0
StreamingTV          0
StreamingMovies      0
Contract             0
PaperlessBilling     0
PaymentMethod        0
MonthlyCharges       0
TotalCharges        11
Churn                0
dtype: int64

In [None]:
# Handling missing TotalCharges (By replacing empty strings with NaN then impute)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

In [None]:
# Then impute the missing TotalCharges with median
imputer = SimpleImputer(strategy='median')
df['TotalCharges'] = imputer.fit_transform(df[['TotalCharges']])


In [None]:

# Conversion of SeniorCitizen to categorical
df['SeniorCitizen'] = df['SeniorCitizen'].map({0: 'No', 1: 'Yes'})


In [None]:

# Conversion tenure to categorical bins
df['TenureGroup'] = pd.cut(df['tenure'], bins=[0, 12, 24, 48, 72], labels=['0-1 Year', '1-2 Years', '2-4 Years', '4-6 Years'])


In [None]:

# Dropping tenure as we'll use TenureGroup
df.drop('tenure', axis=1, inplace=True)

In [None]:

# To check if my data is cleaned
print("\nData after cleaning:")
df.head()
print("\nData types:")
print(df.dtypes)


Data after cleaning:

Data types:
gender                object
SeniorCitizen         object
Partner               object
Dependents            object
PhoneService          object
MultipleLines         object
InternetService       object
OnlineSecurity        object
OnlineBackup          object
DeviceProtection      object
TechSupport           object
StreamingTV           object
StreamingMovies       object
Contract              object
PaperlessBilling      object
PaymentMethod         object
MonthlyCharges       float64
TotalCharges         float64
Churn                 object
TenureGroup         category
dtype: object
