In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [None]:
df = pd.read_csv("Telco-Customer-Churn.csv")

df.drop(['customerID'], axis=1, inplace=True) # Dropping the column 'customerID' because it is not useful for predicting

df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce') # Converts 'TotalCharges' to numerical values and non-numeric values become NaN
df['TotalCharges'].fillna(df['TotalCharges'].median()) # Fills missing values with median

# Converts Yes/No categorical columns into binary values (mapping Yes as 1, No as 0)
binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'Churn'] 
for col in binary_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

df['SeniorCitizen'] = df['SeniorCitizen'].map({0: 'No', 1: 'Yes'}) # Converts 'SeniorCitizen' from '0' and '1' to 'No' and 'Yes'

df = pd.get_dummies(df, columns=['gender', 'InternetService', 'Contract', 'PaymentMethod']) # Encoding categorical variables into binary categories

# Normalize/standardize numerical features by using StandardScaler by scaling them
scaler = StandardScaler()
num_cols = ['MonthlyCharges', 'TotalCharges']
df[num_cols] = scaler.fit_transform(df[num_cols])

pd.options.display.max_columns = None # Use this option to display all columns
df.describe().style.format("{:.1f}")  # I use .style.format("{:.1f}") to make the output more concise

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges
count,7043.0,7043.0,7043.0,7032.0
mean,0.2,32.4,64.8,2283.3
std,0.4,24.6,30.1,2266.8
min,0.0,0.0,18.2,18.8
25%,0.0,9.0,35.5,401.4
50%,0.0,29.0,70.3,1397.5
75%,0.0,55.0,89.8,3794.7
max,1.0,72.0,118.8,8684.8
