In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random

In [2]:
# Initialize Faker
fake = Faker()

random.seed(42)
np.random.seed(42)

<big>Customer data</big>

| Column | Description |
| :------------- | :------------------------- |
| customer_id | Unique ID for each customer |
| name | Customer name |
| gender | Male / Female |
| age | Customer age |
| registration_channel | Online / Offline |
| registration_date | Date they joined |

In [3]:
# --------------- #
# Create Customer #
# --------------- #

n_customers = 500

customer_data = []

for _ in range(n_customers):
    customer_data.append({
        "customer_id": fake.uuid4(),
        "name": fake.name(),
        "gender": random.choice(["Male", "Female"]),
        "age": random.randint(18, 70),
        "registration_channel": random.choice(["Online", "Offline"]),
        "registration_date": fake.date_between(start_date='-5y', end_date='-1y')
    })

customer_df = pd.DataFrame(customer_data)

In [4]:
# Save
customer_df.to_csv("../data/raw/customer.csv", index=False)
print(f"Generated {len(customer_df)} customers.")
customer_df.head(10)

Generated 500 customers.


Unnamed: 0,customer_id,name,gender,age,registration_channel,registration_date
0,1a4a26c3-2207-4396-b175-be712100e243,Kevin Thompson,Male,19,Offline,2022-07-01
1,68fcf654-16a0-4e63-baef-ac161ba0aad5,Charles Mckee,Male,32,Online,2021-03-23
2,7231d7ba-0c7e-40a6-9cca-5c3321552882,Kevin Wade,Male,61,Online,2022-01-31
3,527395b0-d07f-4afa-aecd-3876c0939ff9,Ronald Phillips,Female,20,Online,2022-01-15
4,470d3c0f-4102-4cbd-bb46-69e79832d3dc,Dr. Michelle Reyes,Male,31,Online,2023-12-06
5,2bcf7803-578f-4a67-b35f-8ea4e3c29a93,Diane Alvarado,Male,53,Online,2024-02-19
6,b7fa0016-ba05-4ae7-abfd-6b56be17071a,Eric Shaw,Female,32,Offline,2020-11-09
7,cf6a59d0-092b-466b-9996-9a8aa5bfa249,Rebecca Aguirre,Female,69,Online,2021-01-24
8,12683043-fd21-4a16-8fcd-208b1d13cfd4,David Lewis,Male,62,Offline,2020-11-01
9,c1e5b044-0718-44b4-9a75-406fbf961369,Robert Johnson,Female,35,Online,2021-10-26


<big>Online Transactions</big>

| Column | Description |
| :------- | :----------- |
| transaction_id | Unique transaction ID |
| customer_id | Link to customer | 
| transaction_date | Date of online purchase |
| amount | Amount spent (THB) |
|platform | Website / App |

In [5]:
# -------------------------- #
# Create Online Transactions #
# -------------------------- #

online_transactions = []

for _ in range(3000):
    online_transactions.append({
        "transaction_id": fake.uuid4(),
        "customer_id": random.choice(customer_df["customer_id"].values),
        "transaction_date": fake.date_between(start_date='-5y', end_date='-1y'),
        "amount": round(random.uniform(100, 5000), 2),
        "platform": random.choice(["Website", "App"])
    })

online_df = pd.DataFrame(online_transactions)

In [6]:
online_df.to_csv("../data/raw/online_transactions.csv", index=False)
print(f"Generated {len(online_df)} online transactions.")
online_df.head(10)

Generated 3000 online transactions.


Unnamed: 0,transaction_id,customer_id,transaction_date,amount,platform
0,eb68282d-c40d-4dd6-8c82-a04561fdeb59,61df8168-e62c-421a-9970-c530eff973c1,2020-07-18,1210.06,Website
1,07738d65-e9c8-4768-a184-86f7db3c57b0,60809de2-52cb-4080-9dc1-356ffe51374b,2023-03-16,478.11,Website
2,41134906-8039-4166-92b6-4ffed3649dc5,00bf6959-c043-41ea-a1a1-25f1db65b0f3,2021-09-26,3877.29,Website
3,73e77662-9974-4408-953f-a0fe45d97ad3,50d68fbc-ec07-412d-8d9e-168beb3d440e,2022-06-30,1750.15,Website
4,93dafe6b-8904-4fd2-988d-9c97dcc04153,9101a3a1-2731-4032-81d6-ae569ed66a22,2022-05-06,1945.93,Website
5,3a17c97b-45a6-4d3c-ac3c-c6804d3b69ee,5222d2d2-ab83-4bb7-956e-d97937c54182,2023-03-10,4173.59,Website
6,95643857-58d9-400d-b70f-2852bf714cc7,1bf60730-60e7-4265-b7d1-1b84149845fd,2023-01-16,2244.6,App
7,430271f7-29c3-431b-9ea7-54b762ed7fe7,c839ac89-b327-4f71-b270-86d483ae6802,2021-01-11,3414.18,Website
8,de4e117c-28ea-489c-9bfc-f11388fc0531,f84df954-0ae5-4421-9eba-9ab837a90540,2021-07-29,2276.57,App
9,03ec9da3-9f82-4911-91fd-568d6e94810d,e46cfd8f-7263-49f1-8af4-c8bbd5a78049,2022-01-13,3767.83,App


<big>Offline Transactions</big>

| Column | Description |
| :------ | :------------ |
| transaction_id | Unique transaction ID |
| customer_id | Link to customer |
| transaction_date | Date of in-store purchase |
| amount | Amount spent (THB) |
| store_location | City or branch name|

In [7]:
# --------------------------- #
# Create Offline Transactions #
# --------------------------- #

offline_transactions = []

for _ in range(3000):
    offline_transactions.append({
        "transaction_id": fake.uuid4(),
        "customer_id": random.choice(customer_df["customer_id"].values),
        "transaction_date": fake.date_between(start_date='-5y', end_date='-1y'),
        "amount": round(random.uniform(100, 5000), 2),
        "store_location": random.choice(["Bangkok", "Chiang Mai", "Pattaya", "Phuket", "Khon Kaen"])
    })

offline_df = pd.DataFrame(offline_transactions)

In [8]:
offline_df.to_csv("../data/raw/offline_transactions.csv", index=False)
print(f"Generated {len(offline_df)} offline transactions.")
offline_df.head(10)

Generated 3000 offline transactions.


Unnamed: 0,transaction_id,customer_id,transaction_date,amount,store_location
0,ae40417f-9531-41cd-96bc-6066bc5198c4,08e37675-5177-4ffc-9f9a-98caa8e841af,2023-08-09,527.53,Pattaya
1,31043f30-ef39-4196-b017-caf85e076aa2,68fcf654-16a0-4e63-baef-ac161ba0aad5,2022-11-16,1096.07,Bangkok
2,c513ac74-2c7b-4f3d-bce0-de3eee1f221d,8538e326-ef9b-4ecc-bc2b-c1f0b9f14224,2023-05-03,2836.03,Chiang Mai
3,863a5bda-db46-4784-b363-5ea5e2919526,09730510-73af-4201-9303-1cd810dbf16b,2023-02-26,639.77,Pattaya
4,e652f5bb-5eb8-4cba-8cca-686ef729dab6,a7a0e704-62be-4d7d-bd51-58c5202d2865,2020-09-18,4258.88,Chiang Mai
5,4d5e7121-74a0-4660-8195-13ac7d1f3c36,de545a1e-737f-4504-ab95-54c83068a9fd,2022-03-01,2355.9,Khon Kaen
6,34b47cef-c763-44e6-9f9c-2966cf1f459b,b56db7a6-58bd-477d-b712-40c259798b1f,2021-07-10,4663.57,Phuket
7,1ddabf1f-9157-4a6d-bc1e-e6bf993ad3df,66f6509a-ae62-4fcd-92d2-2c74d53a51f2,2022-08-07,1132.61,Chiang Mai
8,5bcda891-47af-41d4-a342-71f1e3fef92c,78b76bc1-ae1a-4fda-a960-0eedc7dac9d8,2024-02-04,3760.28,Bangkok
9,e6429f74-c3ba-44ad-9548-4f5b59f160f8,c0b81e35-63b0-4f50-bd70-02df88a85ca3,2022-05-03,3509.33,Chiang Mai


<big>Loyalty Points data</big>

| Column | Description |
| :------- | :----------- |
| point_id | Unique point ID |
| customer_id | Link to customer |
| points_earned | Points earned (integer) |
| points_redeemed | Points used |
| transaction_date | Date of points update |
| channel | Online / Offline |

In [9]:
# --------------------------- #
#    Create Loyalty Points    #
# --------------------------- #

loyalty_points = []

for _ in range(5000):
    earned = random.randint(10, 500)
    redeemed = random.choice([0, random.randint(0, earned)])
    loyalty_points.append({
        "point_id": fake.uuid4(),
        "customer_id": random.choice(customer_df["customer_id"].values),
        "points_earned": earned,
        "points_redeemed": redeemed,
        "transaction_date": fake.date_between(start_date='-5y', end_date='-1y'),
        "channel": random.choice(["Online", "Offline"])
    })

loyalty_df = pd.DataFrame(loyalty_points)

In [10]:
loyalty_df.to_csv("../data/raw/loyalty_points.csv", index=False)
print(f"Generated {len(loyalty_df)} loyalty points records.")
loyalty_df.head(10)

Generated 5000 loyalty points records.


Unnamed: 0,point_id,customer_id,points_earned,points_redeemed,transaction_date,channel
0,3baf1098-df86-45f0-b20e-c07042265d75,2f6f2591-5e22-4d9f-b1f1-3e2e19efc29e,109,89,2022-05-28,Offline
1,1c60aaac-461a-4d88-acbb-df143e269697,b9066cfa-1166-49e4-be88-b95f29f6dd42,379,0,2023-08-17,Online
2,524a9421-9623-41f6-858a-184e8719df9c,d0def92b-1a21-41fb-a1a0-e596a956a334,422,109,2022-08-18,Offline
3,cbf9f2ed-c359-4837-bc70-6daadfe2abb5,5a7a4aa5-6e54-45d6-9469-f467fde65c67,431,0,2020-12-23,Offline
4,cf13e42d-2c5f-4e47-8394-77c414210f5f,7ee16e03-9a21-456e-9ecc-64a68513a181,70,18,2023-07-28,Offline
5,4b9721c2-42e9-48df-9608-26b3b1f6a78c,372138f9-4169-487c-a649-eafc4ff3dc87,488,183,2021-10-16,Offline
6,3319902c-ea37-4870-8dc4-b4c8096b0d34,11be8267-8836-4f03-92f8-51f9720a6852,208,0,2022-05-22,Online
7,e8d19f1b-f55b-44c4-b991-23c2bbd28668,744a241a-d59c-42da-94cd-47f710345167,142,0,2021-06-29,Online
8,13a5f417-9e70-42f6-9e52-9fbfec9370d5,5a0b26d6-cb3a-4f00-aef6-f1ec34759620,256,27,2022-08-24,Offline
9,ed256a46-ae7f-4f69-8964-5c86face0538,e3b0e046-0442-4459-979e-c959587fecf8,324,82,2021-05-08,Offline
