In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


In [2]:
df = pd.read_csv("Cleaned_dataset.csv")

In [3]:
df['Churn'] = df['Churn'].map({'yes' : 1, 'No' : 0})

In [4]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0.0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,0.0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,0.0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,


In [5]:
df = df.drop(columns = ['customerID'])

In [10]:
num_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

outlier_rows = set()
summary = []
for col in num_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    mask = (df[col] < lower) | (df[col] > upper)
    count = mask.sum()
    summary.append((col, int(count), round(count / len(df) * 100, 3)))
    outlier_rows.update(df.index[mask].tolist())

summary_df = pd.DataFrame(summary, columns=['column','outlier_count','outlier_pct']).set_index('column').sort_values('outlier_count', ascending=False)
print(summary_df)
print(f"\nRows with any numeric-column IQR outlier: {len(outlier_rows)}")
# show a few example outlier rows (if you want)
if outlier_rows:
    display(df.loc[sorted(outlier_rows)].head())

                outlier_count  outlier_pct
column                                    
tenure                      0          0.0
MonthlyCharges              0          0.0
TotalCharges                0          0.0

Rows with any numeric-column IQR outlier: 0


Outliers were analyzed using the IQR method for numerical features such as tenure, MonthlyCharges, and TotalCharges.
No outliers were detected, indicating a well-distributed and validated dataset.
Therefore, no outlier treatment was required.