## __Loading Dataset__

In [3]:
import pandas as pd

In [4]:
df=pd.read_excel('Telecom Churn Rate Dataset.xlsx')

In [5]:
df.head(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,numAdminTickets,numTechTickets,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0,0,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,One year,No,Mailed check,56.95,1889.5,0,0,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,0,0,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,0,3,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,0,0,Yes


## __Advanced Missing Check__

In [6]:
#Sometimes, the cells are not empty but filled with values representing its empty or missing such as : "?" and "na"

#In order to catch these cells we might have 2 following approaches:

#1)Checking datatypes and unique values of columns. However this method could be time consuming when the dataset-
#-has too many columns and the categoric columns have too many unique values. It might be almost impossible for human eye.

#2)Second approach could be creating a list of possible missing value substitutes and searching for them through the
#-entire data set if the type of values are string. This method could be compututationaly cost with large datasets. 
#-To decrease this cost, vectorizational searching can be used.

### __Version 1 - Data Frame with Missing Values__

In [25]:
def adv_miss_check(df):
    no_info_types = set([" ","-","--","na","n/a","?","no info","missing info","*"])

    df = df.apply(lambda x: x.map(lambda y: str(y).lower() if isinstance(y, str) else y))

    missing_rows = df[df.isin(no_info_types).any(axis=1)]

    return missing_rows

In [26]:
adv_miss_check(df)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,numAdminTickets,numTechTickets,Churn
488,4472-lvygi,female,0,yes,yes,0,no,no phone service,dsl,yes,...,yes,no,two year,yes,bank transfer (automatic),52.55,,0,0,no
753,3115-czmzd,male,0,no,yes,0,yes,no,no,no internet service,...,no internet service,no internet service,two year,no,mailed check,20.25,,5,0,no
936,5709-lvoeq,female,0,yes,yes,0,yes,no,dsl,yes,...,yes,yes,two year,no,mailed check,80.85,,0,0,no
1082,4367-nuyao,male,0,yes,yes,0,yes,yes,no,no internet service,...,no internet service,no internet service,two year,no,mailed check,25.75,,1,0,no
1340,1371-dwpaz,female,0,yes,yes,0,no,no phone service,dsl,yes,...,yes,no,two year,no,credit card (automatic),56.05,,0,0,no
3331,7644-omvmy,male,0,yes,yes,0,yes,no,no,no internet service,...,no internet service,no internet service,two year,no,mailed check,19.85,,0,0,no
3826,3213-vvolg,male,0,yes,yes,0,yes,yes,no,no internet service,...,no internet service,no internet service,two year,no,mailed check,25.35,,0,0,no
4380,2520-sgtta,female,0,yes,yes,0,yes,no,no,no internet service,...,no internet service,no internet service,two year,no,mailed check,20.0,,5,0,no
5218,2923-arzlg,male,0,yes,yes,0,yes,no,no,no internet service,...,no internet service,no internet service,one year,yes,mailed check,19.7,,0,0,no
6670,4075-wkniu,female,0,yes,yes,0,yes,yes,dsl,no,...,yes,no,two year,no,mailed check,73.35,,0,0,no


### __Version 2 - Indexes of Rows Having Missing Value__

In [13]:
import pandas as pd

def adv_miss_check(df):
    no_info_types = set([" ","-","--","na","n/a","?","no info","missing info","*"])

    df = df.apply(lambda x: x.map(lambda y: str(y).lower() if isinstance(y, str) else y))

    missing_rows = df[df.isin(no_info_types).any(axis=1)]

    missing_rows_indexes = missing_rows.index

    return missing_rows_indexes

In [16]:
adv_miss_check(df)

Index([488, 753, 936, 1082, 1340, 3331, 3826, 4380, 5218, 6670, 6754], dtype='int64')

### __Version 3 - Row Indexes with Column Names__

In [21]:
import pandas as pd

def adv_miss_check(df):
    no_info_types = set([" ","-","--","na","n/a","?","no info","missing info","*"])

    df = df.apply(lambda x: x.map(lambda y: str(y).lower() if isinstance(y, str) else y))

    missing_cells = df[df.isin(no_info_types)]

    missing_cols_with_space = missing_cells.columns[missing_cells.apply(lambda x: x.isin([" "]).any())]
    missing_info = []

    for col in missing_cols_with_space:
        col_indexes = missing_cells.index[missing_cells[col] == " "].tolist()
        missing_info.extend([(index, col) for index in col_indexes])

    return missing_info

In [22]:
adv_miss_check(df)

[(488, 'TotalCharges'),
 (753, 'TotalCharges'),
 (936, 'TotalCharges'),
 (1082, 'TotalCharges'),
 (1340, 'TotalCharges'),
 (3331, 'TotalCharges'),
 (3826, 'TotalCharges'),
 (4380, 'TotalCharges'),
 (5218, 'TotalCharges'),
 (6670, 'TotalCharges'),
 (6754, 'TotalCharges')]