In [1]:
import pandas as pd
import numpy as np

# Load the original cleaned dataset
df = pd.read_csv('Telco-Customer-Churn.csv')

# Drop customerID
df.drop('customerID', axis=1, inplace=True)

# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna()
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

df = pd.get_dummies(df, drop_first=True)

# Save a clean reference copy
df.to_csv('telco_churn_clean.csv', index=False)
print("Loaded and saved clean dataset.")

Loaded and saved clean dataset.


In [2]:
df_drifted = df.copy()

# 1. Inflate MonthlyCharges (simulate rising prices)
df_drifted['MonthlyCharges'] *= np.random.normal(1.25, 0.05, size=len(df_drifted))

# 2. Shorten Tenure (simulate newer users)
df_drifted['tenure'] = df_drifted['tenure'] * np.random.uniform(0.4, 0.6)

# 3. Add noise to TotalCharges
df_drifted['TotalCharges'] += np.random.normal(20, 10, size=len(df_drifted))

In [3]:
# Flip 15% of churn labels randomly
flip_fraction = 0.15
flip_indices = df_drifted.sample(frac=flip_fraction, random_state=42).index
df_drifted.loc[flip_indices, 'Churn'] = 1 - df_drifted.loc[flip_indices, 'Churn']

In [4]:
# Save drifted version
df_drifted.to_csv('telco_churn_drifted.csv', index=False)
print("Drifted dataset saved.")

Drifted dataset saved.
