In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Number of records
n = 8000

# Generate synthetic customer churn dataset
data = {
    "CustomerID": [f"CUST{1000 + i}" for i in range(n)],
    "Gender": np.random.choice(["Male", "Female"], n),
    "Age": np.random.randint(18, 70, n),
    "Tenure": np.random.randint(1, 72, n),
    "PlanType": np.random.choice(["Basic", "Silver", "Gold", "Premium"], n, p=[0.4, 0.3, 0.2, 0.1]),
    "MonthlyCharges": np.round(np.random.uniform(15, 120, n), 2),
    "InternetService": np.random.choice(["DSL", "Fiber optic", "None"], n, p=[0.4, 0.5, 0.1]),
    "ContractType": np.random.choice(["Month-to-month", "One year", "Two year"], n, p=[0.6, 0.25, 0.15]),
    "PaymentMethod": np.random.choice(["Credit card", "Bank transfer", "Electronic check", "Mailed check"], n),
    "TechSupport": np.random.choice(["Yes", "No"], n, p=[0.4, 0.6]),
    "OnlineSecurity": np.random.choice(["Yes", "No"], n, p=[0.45, 0.55]),
    "DeviceProtection": np.random.choice(["Yes", "No"], n, p=[0.5, 0.5]),
    "StreamingTV": np.random.choice(["Yes", "No"], n, p=[0.6, 0.4]),
    "StreamingMovies": np.random.choice(["Yes", "No"], n, p=[0.65, 0.35]),
    "PaperlessBilling": np.random.choice(["Yes", "No"], n, p=[0.7, 0.3]),
}

# Calculate TotalCharges (MonthlyCharges * Tenure + some noise)
data["TotalCharges"] = np.round(data["MonthlyCharges"] * data["Tenure"] * np.random.uniform(0.9, 1.1, n), 2)

# Generate churn based on realistic patterns
data["Churn"] = [
    "Yes" if (
        (data["Tenure"][i] < 12 and data["ContractType"][i] == "Month-to-month") or
        (data["MonthlyCharges"][i] > 100 and data["PlanType"][i] == "Basic")
    ) else "No"
    for i in range(n)
]

# Create DataFrame
df_churn = pd.DataFrame(data)

# Save CSV
file_path = r"C:\Users\Neha Prakash Jogdand\OneDrive\Desktop\Customer_Churn_Data.csv"
df_churn.to_csv(file_path, index=False)

print(" Dataset created successfully!")
print("File saved at:", file_path)
print(df_churn.head())


 Dataset created successfully!
File saved at: C:\Users\Neha Prakash Jogdand\OneDrive\Desktop\Customer_Churn_Data.csv
  CustomerID  Gender  Age  Tenure PlanType  MonthlyCharges InternetService  \
0   CUST1000    Male   59      14   Silver           43.28             DSL   
1   CUST1001  Female   38       6   Silver           66.63     Fiber optic   
2   CUST1002    Male   54      23  Premium           90.34     Fiber optic   
3   CUST1003    Male   44      60    Basic           69.24     Fiber optic   
4   CUST1004    Male   69      12   Silver           15.80     Fiber optic   

     ContractType  PaymentMethod TechSupport OnlineSecurity DeviceProtection  \
0  Month-to-month    Credit card         Yes             No               No   
1  Month-to-month    Credit card          No             No               No   
2  Month-to-month    Credit card          No             No               No   
3  Month-to-month    Credit card         Yes            Yes               No   
4  Month-to-mo

In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv("Customer_Churn_Data.csv")

# Show basic info
print(df.shape)
print(df.columns)
df.head(10)


(8000, 17)
Index(['CustomerID', 'Gender', 'Age', 'Tenure', 'PlanType', 'MonthlyCharges',
       'InternetService', 'ContractType', 'PaymentMethod', 'TechSupport',
       'OnlineSecurity', 'DeviceProtection', 'StreamingTV', 'StreamingMovies',
       'PaperlessBilling', 'TotalCharges', 'Churn'],
      dtype='object')


Unnamed: 0,CustomerID,Gender,Age,Tenure,PlanType,MonthlyCharges,InternetService,ContractType,PaymentMethod,TechSupport,OnlineSecurity,DeviceProtection,StreamingTV,StreamingMovies,PaperlessBilling,TotalCharges,Churn
0,CUST1000,Male,59,14,Silver,43.28,DSL,Month-to-month,Credit card,Yes,No,No,No,No,Yes,631.38,No
1,CUST1001,Female,38,6,Silver,66.63,Fiber optic,Month-to-month,Credit card,No,No,No,No,Yes,Yes,423.95,Yes
2,CUST1002,Male,54,23,Premium,90.34,Fiber optic,Month-to-month,Credit card,No,No,No,No,No,No,2155.31,No
3,CUST1003,Male,44,60,Basic,69.24,Fiber optic,Month-to-month,Credit card,Yes,Yes,No,Yes,No,Yes,4356.57,No
4,CUST1004,Male,69,12,Silver,15.8,Fiber optic,Month-to-month,Bank transfer,No,No,No,Yes,Yes,No,196.89,No
5,CUST1005,Female,46,13,Basic,23.22,DSL,One year,Electronic check,No,No,Yes,No,Yes,Yes,282.17,No
6,CUST1006,Male,37,6,Basic,90.74,DSL,One year,Bank transfer,Yes,Yes,Yes,No,Yes,No,578.26,No
7,CUST1007,Male,42,63,Basic,56.99,DSL,Month-to-month,Credit card,No,No,No,Yes,No,No,3680.6,No
8,CUST1008,Male,23,38,Silver,97.4,DSL,Month-to-month,Bank transfer,No,No,Yes,Yes,Yes,Yes,3620.9,No
9,CUST1009,Female,53,50,Gold,83.2,Fiber optic,One year,Mailed check,No,No,Yes,No,Yes,No,4295.7,No
