# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [3]:
pip install faker pandas numpy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import pandas as pd
import numpy as np
from faker import Faker
import random
import os

# Initialize Faker
fake = Faker('en_IN')

# Parameters
n_customers = 2000
job_roles = ["Software Engineer", "Consultant", "Banker", "Designer", "Sales", "Data Scientist", "Product Manager"]
employment_types = ["Salaried", "Contract", "Freelance"]
cities = ["Mumbai", "Bengaluru", "Delhi", "Hyderabad", "Chennai", "Pune", "Gurugram", "Kolkata"]

# === Save directly to Downloads folder ===
downloads_dir = os.path.join(os.path.expanduser("~"), "Downloads", "synthetic_data")
os.makedirs(downloads_dir, exist_ok=True)

# Generate customer profiles
customer_data = []
for i in range(1, n_customers + 1):
    age = random.randint(23, 40)
    gender = random.choice(["Male", "Female", "Non-binary"])
    city = random.choice(cities)
    job = random.choice(job_roles)
    employment_type = random.choice(employment_types)
    income = random.randint(300000, 2000000)
    credit_score = random.randint(300, 850)
    savings_rate = round(random.uniform(0, 0.6), 2)
    existing_loan_balance = random.choice([0, random.randint(50000, 1000000)])
    account_balance = random.randint(1000, 500000)
    joined_date = fake.date_between(start_date="-5y", end_date="today")

    customer_data.append([
        i, age, gender, city, job, employment_type, income,
        credit_score, savings_rate, existing_loan_balance,
        account_balance, joined_date
    ])

customers_df = pd.DataFrame(customer_data, columns=[
    "customer_id", "age", "gender", "city", "job_role", "employment_type",
    "income", "credit_score", "savings_rate", "existing_loan_balance",
    "account_balance", "joined_date"
])

# Transactions
transaction_categories = ["Rent", "Groceries", "Entertainment", "Travel", "Investment", "Dining"]
transactions = []
for cust_id in customers_df['customer_id']:
    num_txns = random.randint(10, 50)
    for _ in range(num_txns):
        txn_date = fake.date_between(start_date="-1y", end_date="today")
        category = random.choice(transaction_categories)
        amount = round(random.uniform(500, 50000), 2)
        transactions.append([cust_id, txn_date, category, amount])

transactions_df = pd.DataFrame(transactions, columns=[
    "customer_id", "transaction_date", "category", "amount"
])

# Campaign Events
campaign_responses = ["Opened", "Clicked", "Converted", "Ignored"]
campaigns = []
for cust_id in customers_df['customer_id']:
    num_events = random.randint(1, 5)
    for _ in range(num_events):
        event_date = fake.date_between(start_date="-6m", end_date="today")
        response = random.choices(campaign_responses, weights=[0.4, 0.3, 0.2, 0.1])[0]
        campaigns.append([cust_id, event_date, response])

campaigns_df = pd.DataFrame(campaigns, columns=[
    "customer_id", "event_date", "response"
])

# Save files in Downloads
customers_file = os.path.join(downloads_dir, "customers.csv")
transactions_file = os.path.join(downloads_dir, "transactions.csv")
campaigns_file = os.path.join(downloads_dir, "campaign_events.csv")

customers_df.to_csv(customers_file, index=False)
transactions_df.to_csv(transactions_file, index=False)
campaigns_df.to_csv(campaigns_file, index=False)

print(f"✅ Files saved in your Downloads folder: {downloads_dir}")
print(f"- {customers_file}")
print(f"- {transactions_file}")
print(f"- {campaigns_file}")

✅ Files saved in your Downloads folder: C:\Users\user\Downloads\synthetic_data
- C:\Users\user\Downloads\synthetic_data\customers.csv
- C:\Users\user\Downloads\synthetic_data\transactions.csv
- C:\Users\user\Downloads\synthetic_data\campaign_events.csv
