In [1]:
# Google Drive ko Colab ke saath connect karne ke liye
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os

# Project ka exact folder path (Drive ke andar)
PROJECT_PATH = "/content/drive/MyDrive/Customer_churn_Telecom"

# Raw data folder ka path
DATA_PATH = os.path.join(PROJECT_PATH, "data/raw")

# Check karte hain file available hai ya nahi
os.listdir(DATA_PATH)


['Telco_customer_churn.xlsx']

In [3]:
# Pandas: data load aur analysis ke liye
# NumPy: numerical operations ke liye
import pandas as pd
import numpy as np



In [4]:
# Dataset ka complete path
file_path = os.path.join(DATA_PATH, "Telco_customer_churn.xlsx")

# Excel file ko DataFrame mein load kar rahe hain
df = pd.read_excel(file_path)


In [5]:
# Dataset ke first 5 rows dekhte hain
df.head()


Unnamed: 0,CustomerID,Count,Country,State,City,Zip Code,Lat Long,Latitude,Longitude,Gender,...,Contract,Paperless Billing,Payment Method,Monthly Charges,Total Charges,Churn Label,Churn Value,Churn Score,CLTV,Churn Reason
0,3668-QPYBK,1,United States,California,Los Angeles,90003,"33.964131, -118.272783",33.964131,-118.272783,Male,...,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,86,3239,Competitor made better offer
1,9237-HQITU,1,United States,California,Los Angeles,90005,"34.059281, -118.30742",34.059281,-118.30742,Female,...,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,67,2701,Moved
2,9305-CDSKC,1,United States,California,Los Angeles,90006,"34.048013, -118.293953",34.048013,-118.293953,Female,...,Month-to-month,Yes,Electronic check,99.65,820.5,Yes,1,86,5372,Moved
3,7892-POOKP,1,United States,California,Los Angeles,90010,"34.062125, -118.315709",34.062125,-118.315709,Female,...,Month-to-month,Yes,Electronic check,104.8,3046.05,Yes,1,84,5003,Moved
4,0280-XJGEX,1,United States,California,Los Angeles,90015,"34.039224, -118.266293",34.039224,-118.266293,Male,...,Month-to-month,Yes,Bank transfer (automatic),103.7,5036.3,Yes,1,89,5340,Competitor had better devices


In [6]:
# Total rows aur columns
df.shape


(7043, 33)

In [7]:
# Column types aur missing values check karne ke liye
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 33 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   CustomerID         7043 non-null   object 
 1   Count              7043 non-null   int64  
 2   Country            7043 non-null   object 
 3   State              7043 non-null   object 
 4   City               7043 non-null   object 
 5   Zip Code           7043 non-null   int64  
 6   Lat Long           7043 non-null   object 
 7   Latitude           7043 non-null   float64
 8   Longitude          7043 non-null   float64
 9   Gender             7043 non-null   object 
 10  Senior Citizen     7043 non-null   object 
 11  Partner            7043 non-null   object 
 12  Dependents         7043 non-null   object 
 13  Tenure Months      7043 non-null   int64  
 14  Phone Service      7043 non-null   object 
 15  Multiple Lines     7043 non-null   object 
 16  Internet Service   7043 

In [8]:
# Saare column names list kar rahe hain
# Ye future errors avoid karta hai
df.columns.tolist()


['CustomerID',
 'Count',
 'Country',
 'State',
 'City',
 'Zip Code',
 'Lat Long',
 'Latitude',
 'Longitude',
 'Gender',
 'Senior Citizen',
 'Partner',
 'Dependents',
 'Tenure Months',
 'Phone Service',
 'Multiple Lines',
 'Internet Service',
 'Online Security',
 'Online Backup',
 'Device Protection',
 'Tech Support',
 'Streaming TV',
 'Streaming Movies',
 'Contract',
 'Paperless Billing',
 'Payment Method',
 'Monthly Charges',
 'Total Charges',
 'Churn Label',
 'Churn Value',
 'Churn Score',
 'CLTV',
 'Churn Reason']

In [9]:
# Churn Label ka distribution
df["Churn Label"].value_counts()


Unnamed: 0_level_0,count
Churn Label,Unnamed: 1_level_1
No,5174
Yes,1869


In [10]:
# Har column mein missing values ka count
df.isna().sum()


Unnamed: 0,0
CustomerID,0
Count,0
Country,0
State,0
City,0
Zip Code,0
Lat Long,0
Latitude,0
Longitude,0
Gender,0


In [11]:
# Processed folder create kar rahe hain (agar exist nahi karta)
processed_path = os.path.join(PROJECT_PATH, "data/processed")
os.makedirs(processed_path, exist_ok=True)

# Original dataset ka backup save kar rahe hain
df.to_csv(
    os.path.join(processed_path, "churn_original_backup.csv"),
    index=False
)
