In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random

In [2]:
# Initialize Faker
fake = Faker()

random.seed(42)
np.random.seed(42)

In [3]:
# --------------- #
# Create Customer #
# --------------- #

n_customers = 500

customer_data = []

for _ in range(n_customers):
    customer_data.append({
        "customer_id": fake.uuid4(),
        "name": fake.name(),
        "gender": random.choice(["Male", "Female"]),
        "age": random.randint(18, 70),
        "registration_channel": random.choice(["Online", "Offline"]),
        "registration_date": fake.date_between(start_date='-5y', end_date='-1y')
    })

customer_df = pd.DataFrame(customer_data)

In [None]:
# Save
customer_df.to_csv("../data/raw/customer.csv", index=False)
print(f"Generated {len(customer_df)} customers.")

In [None]:
# -------------------------- #
# Create Online Transactions #
# -------------------------- #

online_transactions = []

for _ in range(3000):
    online_transactions.append({
        "transaction_id": fake.uuid4(),
        "customer_id": random.choice(customer_df["customer_id"].values),
        "transaction_date": fake.date_between(start_date='-5y', end_date='-1y'),
        "amount": round(random.uniform(100, 5000), 2),
        "platform": random.choice(["Website", "App"])
    })

online_df = pd.DataFrame(online_transactions)

In [6]:
online_df.to_csv("../data/raw/online_transactions.csv", index=False)
print(f"Generated {len(online_df)} online transactions.")

Generated 3000 online transactions.


In [7]:
# --------------------------- #
# Create Offline Transactions #
# --------------------------- #

offline_transactions = []

for _ in range(3000):
    offline_transactions.append({
        "transaction_id": fake.uuid4(),
        "customer_id": random.choice(customer_df["customer_id"].values),
        "transaction_date": fake.date_between(start_date='-5y', end_date='-1y'),
        "amount": round(random.uniform(100, 5000), 2),
        "store_location": random.choice(["Bangkok", "Chiang Mai", "Pattaya", "Phuket", "Khon Kaen"])
    })

offline_df = pd.DataFrame(offline_transactions)

In [9]:
offline_df.to_csv("../data/raw/offline_transactions.csv", index=False)
print(f"Generated {len(offline_df)} offline transactions.")

Generated 3000 offline transactions.


In [11]:
# --------------------------- #
#    Create Loyalty Points    #
# --------------------------- #

loyalty_points = []

for _ in range(5000):
    earned = random.randint(10, 500)
    redeemed = random.choice([0, random.randint(0, earned)])
    loyalty_points.append({
        "point_id": fake.uuid4(),
        "customer_id": random.choice(customer_df["customer_id"].values),
        "points_earned": earned,
        "points_redeemed": redeemed,
        "transaction_date": fake.date_between(start_date='-5y', end_date='-1y'),
        "channel": random.choice(["Online", "Offline"])
    })

loyalty_df = pd.DataFrame(loyalty_points)

In [12]:
loyalty_df.to_csv("../data/raw/loyalty_points.csv", index=False)
print(f"Generated {len(loyalty_df)} loyalty points records.")

Generated 5000 loyalty points records.
