In [69]:
import pandas as pd
import numpy as np

In [70]:
df = pd.read_csv('Telco.csv')

In [71]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [72]:
def detect_outliers_in_column(column, threshold=1.5):
    q1 = column.quantile(0.25)
    q3 = column.quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - threshold * iqr
    upper_bound = q3 + threshold * iqr
    
    outlier_indices = column[(column < lower_bound) | (column > upper_bound)].index.tolist()
    
    return outlier_indices

In [73]:
detect_outliers_in_column(df['MonthlyCharges'], threshold=0.5)

[2110, 2363, 3200, 3294, 3887, 4148, 4578, 4602, 4796, 4867, 5119, 6109, 6757]

In [74]:
df.dtypes

customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges        float64
Churn                object
dtype: object

In [75]:
df.select_dtypes(include=np.number).columns

Index(['SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges'], dtype='object')

In [76]:
def outliers_labeling(df, threshold=1.5):
    df_working = df[df.select_dtypes(include=np.number).columns]
    for col in df_working.columns:
        if set(df_working[col].unique()) == {0, 1}:
            continue
        outlier_indices = detect_outliers_in_column(df_working[col], threshold)
        df_working.loc[outlier_indices, col] = np.nan
        
    return df_working

In [77]:
outliers_labeling(df, threshold=0.3)

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges
0,0,1.0,29.85,29.85
1,0,34.0,56.95,1889.50
2,0,2.0,53.85,108.15
3,0,45.0,42.30,1840.75
4,0,2.0,70.70,151.65
...,...,...,...,...
7027,0,24.0,84.80,1990.50
7028,0,,103.20,
7029,0,11.0,29.60,346.45
7030,1,4.0,74.40,306.60


In [78]:
def count_outliers_in_row(df, threshold=1.5):
    df_working = df[df.select_dtypes(include=np.number).columns]
    df_working = outliers_labeling(df_working, threshold)
    df_working['outliers_count'] = df_working.isnull().sum(axis=1)
    
    return df_working['outliers_count']

In [79]:
df['outliers_count'] = count_outliers_in_row(df, threshold=0.3)

In [80]:
df['outliers_count'].value_counts()

outliers_count
0    5372
1     961
2     534
3     165
Name: count, dtype: int64

In [83]:
df[df['outliers_count'] == 3]

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,outliers_count
15,3655-SNQYZ,Female,0,Yes,Yes,69,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Two year,No,Credit card (automatic),113.25,7895.15,No,3
17,9959-WOFKT,Male,0,No,Yes,71,Yes,Yes,Fiber optic,Yes,...,No,Yes,Yes,Two year,No,Bank transfer (automatic),106.70,7382.25,No,3
59,5954-BDFSG,Female,0,No,No,72,Yes,Yes,Fiber optic,No,...,Yes,Yes,Yes,Two year,Yes,Credit card (automatic),107.50,7853.70,No,3
152,1679-JRFBR,Female,0,Yes,Yes,70,Yes,Yes,Fiber optic,Yes,...,No,Yes,Yes,One year,Yes,Credit card (automatic),108.15,7930.55,No,3
193,9680-NIAUV,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,Yes,...,No,Yes,Yes,Two year,No,Credit card (automatic),109.70,8129.30,No,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6782,1320-GVNHT,Male,0,Yes,Yes,72,Yes,Yes,Fiber optic,Yes,...,No,Yes,Yes,Two year,Yes,Credit card (automatic),108.40,7767.25,No,3
6788,8071-SBTRN,Female,0,No,No,70,Yes,Yes,Fiber optic,Yes,...,No,Yes,Yes,Two year,Yes,Mailed check,111.15,7737.55,No,3
6848,3508-CFVZL,Female,0,No,No,71,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Two year,No,Mailed check,111.30,7985.90,No,3
6899,0530-IJVDB,Male,0,No,Yes,70,Yes,Yes,Fiber optic,Yes,...,Yes,Yes,Yes,Two year,Yes,Electronic check,114.60,7882.50,No,3
