In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)  # reproducibility
n = 1000  # number of customers

# Generate basic info
customer_id = np.arange(1, n+1)
age = np.random.randint(18, 66, size=n)
income = np.random.normal(40000, 12000, size=n).clip(10000, 100000).astype(int)
gender = np.random.choice(['Male', 'Female'], size=n)
spending_score = np.random.randint(1, 101, size=n)
risk_score = np.random.randint(1, 11, size=n)

# A/B Test Group Assignment
received_bonus_offer = np.random.choice(['Yes', 'No'], size=n)

# Simulate Signups (higher probability with bonus offer and low risk/high spending)
def signup_probability(spending, risk, offer):
    base = 0.2 + (spending / 200) - (risk / 20)
    if offer == 'Yes':
        base += 0.1
    return np.clip(base, 0, 1)

signup_probs = [
    signup_probability(spending_score[i], risk_score[i], received_bonus_offer[i])
    for i in range(n)
]
signed_up = np.random.binomial(1, signup_probs)
signed_up = ['Yes' if val == 1 else 'No' for val in signed_up]

# Final DataFrame
df = pd.DataFrame({
    'customer_id': customer_id,
    'age': age,
    'income': income,
    'gender': gender,
    'spending_score': spending_score,
    'risk_score': risk_score,
    'received_bonus_offer': received_bonus_offer,
    'signed_up': signed_up
})

# Save to CSV
df.to_csv('customers.csv', index=False)
df.head()


Unnamed: 0,customer_id,age,income,gender,spending_score,risk_score,received_bonus_offer,signed_up
0,1,56,20469,Male,19,6,No,No
1,2,46,40577,Female,37,6,Yes,No
2,3,32,43116,Female,85,1,Yes,Yes
3,4,60,29148,Male,8,10,Yes,No
4,5,25,47663,Male,28,2,Yes,No
