In [1]:
import pandas as pd
import numpy as np
import random

# --- 1) Set seed for reproducibility ---
np.random.seed(42)
random.seed(42)

# --- 2) Generate synthetic data ---
n_customers = 1000

customer_ids = [f"CUST_{i:04d}" for i in range(1, n_customers+1)]
ages = np.random.randint(18, 70, size=n_customers)
genders = np.random.choice(["Male", "Female", "Other"], size=n_customers, p=[0.48, 0.48, 0.04])
regions = np.random.choice(["North", "South", "East", "West"], size=n_customers)
subscription_types = np.random.choice(["Basic", "Standard", "Premium"], size=n_customers, p=[0.4, 0.4, 0.2])
tenure_months = np.random.randint(1, 60, size=n_customers)
avg_monthly_watch_hours = np.round(np.random.uniform(1, 50, size=n_customers), 1)
churned = np.random.choice([0, 1], size=n_customers, p=[0.85, 0.15])

# --- 3) Create DataFrame ---
df_customers = pd.DataFrame({
    "CustomerID": customer_ids,
    "Age": ages,
    "Gender": genders,
    "Region": regions,
    "SubscriptionType": subscription_types,
    "TenureMonths": tenure_months,
    "AvgMonthlyWatchHours": avg_monthly_watch_hours,
    "Churned": churned
})

# --- 4) Save to CSV ---
df_customers.to_csv("flo_sports_customers.csv", index=False)

print("✅ CSV file 'flo_sports_customers.csv' has been created in your working directory.")
df_customers.head()


✅ CSV file 'flo_sports_customers.csv' has been created in your working directory.


Unnamed: 0,CustomerID,Age,Gender,Region,SubscriptionType,TenureMonths,AvgMonthlyWatchHours,Churned
0,CUST_0001,56,Female,West,Basic,1,4.2,0
1,CUST_0002,69,Male,East,Standard,55,1.9,1
2,CUST_0003,46,Male,West,Basic,56,41.5,0
3,CUST_0004,32,Male,South,Premium,27,26.7,0
4,CUST_0005,60,Male,North,Premium,46,39.0,0
