In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
df= pd.read_csv('telecom_customer_churn.csv')

## Data Understanding

In [3]:
class DatasetDisplay:
    def __init__(self, df):
        self.df = df
    
    def display_info(self):
        print(f'First Five Rows of DataFrame:\n{self.df.head()}')
        print('------------------------------------------')
        print(f'DataFrame Information:\n{self.df.info()}')
        print('------------------------------------------')
        print(f'Descriptive Statistics of DataFrame:\n{self.df.describe()}')
        print('------------------------------------------')
        print(f'Shape of DataFrame: {self.df.shape}')



In [4]:
display = DatasetDisplay(df)
display.display_info()

First Five Rows of DataFrame:
  Customer ID  Gender  Age Married  Number of Dependents          City  \
0  0002-ORFBO  Female   37     Yes                     0  Frazier Park   
1  0003-MKNFE    Male   46      No                     0      Glendale   
2  0004-TLHLJ    Male   50      No                     0    Costa Mesa   
3  0011-IGKFF    Male   78     Yes                     0      Martinez   
4  0013-EXCHZ  Female   75     Yes                     0     Camarillo   

   Zip Code   Latitude   Longitude  Number of Referrals  ...   Payment Method  \
0     93225  34.827662 -118.999073                    2  ...      Credit Card   
1     91206  34.162515 -118.203869                    0  ...      Credit Card   
2     92627  33.645672 -117.922613                    0  ...  Bank Withdrawal   
3     94553  38.014457 -122.115432                    1  ...  Bank Withdrawal   
4     93010  34.227846 -119.079903                    3  ...      Credit Card   

  Monthly Charge Total Charges  Total 

## Missing Values

In [5]:
def missing(data):
    missing= data.isnull().sum()
    percentage= missing/len(data)
    return pd.DataFrame({'Number Of Missing Values': missing, 'Percentage of Missing Values': percentage})

In [6]:
missing(df)

Unnamed: 0,Number Of Missing Values,Percentage of Missing Values
Customer ID,0,0.0
Gender,0,0.0
Age,0,0.0
Married,0,0.0
Number of Dependents,0,0.0
City,0,0.0
Zip Code,0,0.0
Latitude,0,0.0
Longitude,0,0.0
Number of Referrals,0,0.0


### Drop Columns

In [7]:
class DropColumn:
    def __init__(self, data):
        self.data = data

    def drop_columns(self, columns):
        self.data.drop(columns=columns, inplace=True)

In [8]:
data = df
dropper = DropColumn(data)
columns_to_drop = ['Churn Category', 'Churn Reason', 'Customer ID']  # List of column names to drop
dropper.drop_columns(columns_to_drop)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 35 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Gender                             7043 non-null   object 
 1   Age                                7043 non-null   int64  
 2   Married                            7043 non-null   object 
 3   Number of Dependents               7043 non-null   int64  
 4   City                               7043 non-null   object 
 5   Zip Code                           7043 non-null   int64  
 6   Latitude                           7043 non-null   float64
 7   Longitude                          7043 non-null   float64
 8   Number of Referrals                7043 non-null   int64  
 9   Tenure in Months                   7043 non-null   int64  
 10  Offer                              7043 non-null   object 
 11  Phone Service                      7043 non-null   objec

## Fill missing Values

## Duplicates

In [10]:
def duplicates(data):
    duplicates= data.duplicated().any()
    if duplicates:
        print('Duplicates Present')
    else:
        print('There are no duplicates in the DataFrame')

In [11]:
duplicates(df)

There are no duplicates in the DataFrame


## Unique Values

In [12]:
def unique_values(data):
    categorical_columns = data.select_dtypes(include='object').columns
    for column in categorical_columns:
        unique_values = data[column].unique()
        print(f"There are Unique Values in '{column}': {unique_values}")

In [13]:
unique_values(df)

There are Unique Values in 'Gender': ['Female' 'Male']
There are Unique Values in 'Married': ['Yes' 'No']
There are Unique Values in 'City': ['Frazier Park' 'Glendale' 'Costa Mesa' ... 'Jacumba' 'Carpinteria'
 'Meadow Valley']
There are Unique Values in 'Offer': ['None' 'Offer E' 'Offer D' 'Offer A' 'Offer B' 'Offer C']
There are Unique Values in 'Phone Service': ['Yes' 'No']
There are Unique Values in 'Multiple Lines': ['No' 'Yes' nan]
There are Unique Values in 'Internet Service': ['Yes' 'No']
There are Unique Values in 'Internet Type': ['Cable' 'Fiber Optic' 'DSL' nan]
There are Unique Values in 'Online Security': ['No' 'Yes' nan]
There are Unique Values in 'Online Backup': ['Yes' 'No' nan]
There are Unique Values in 'Device Protection Plan': ['No' 'Yes' nan]
There are Unique Values in 'Premium Tech Support': ['Yes' 'No' nan]
There are Unique Values in 'Streaming TV': ['Yes' 'No' nan]
There are Unique Values in 'Streaming Movies': ['No' 'Yes' nan]
There are Unique Values in 'Streami

## Outliers