In [19]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [20]:
num_events = 10000
user_ids = np.random.randint(1, 501, size=num_events)
item_ids = np.random.randint(1, 5001, size=num_events)
event_ids = range(1, num_events + 1)

In [21]:
action_types = ['view', 'green_toggle', 'refurbished_purchase', 'delayed_delivery', 'normal_purchase']
delivery_choices = ['instant', 'delayed']
product_categories = ['Television (TV)', 'Refrigerator (Fridge)', 'Mixer Grinder',
       'Gas Stove/Cooktop', 'Fans', 'Washing Machine',
       'Air Conditioner (AC)', 'Cooler', 'Water Purifier',
       'Geyser/Water Heater']

In [22]:
def random_timestamp():
    start = datetime(2024, 1, 1)
    end = datetime(2025, 7, 1)
    return start + timedelta(seconds=random.randint(0, int((end - start).total_seconds())))


In [23]:
def assign_eco_score(action, product):
    base = {
        'Television (TV)': (150, 300),
        'Refrigerator (Fridge)': (300, 400),
        'Mixer Grinder': (50, 150),
        'Gas Stove/Cooktop': (100, 200),
        'Fans': (50, 150),
        'Washing Machine': (250, 350),
        'Air Conditioner (AC)': (350, 450),
        'Cooler': (100, 200),
        'Water Purifier': (100, 200),
        'Geyser/Water Heater': (100, 200)
    }.get(product, (100, 300))

    low, high = base

    if action == 'refurbished_purchase':
        return round(np.random.uniform(low * 0.3, low * 0.6), 2)
    elif action == 'delayed_delivery':
        return round(np.random.uniform(low * 0.5, high * 0.7), 2)
    elif action == 'green_toggle':
        return round(np.random.uniform(low * 0.6, high * 0.8), 2)
    elif action == 'normal_purchase':
        return round(np.random.uniform(low * 0.9, high * 1.1), 2)
    else:  # view
        return round(np.random.uniform(low, high), 2)


In [24]:
data = []
for i in range(num_events):
    action = random.choices(
        action_types,
        weights=[60, 25, 15, 10, 20],  # views are more common
        k=1
    )[0]

    product = random.choice(product_categories)
    eco_score = assign_eco_score(action, product)

    refurbished = 1 if action == 'refurbished_purchase' else np.random.choice([0, 1], p=[0.8, 0.2])
    delivery_type = 'delayed' if action == 'delayed_delivery' else np.random.choice(delivery_choices, p=[0.8, 0.2])
    used_green_toggle = 1 if action in ['green_toggle', 'refurbished_purchase', 'delayed_delivery'] else np.random.choice([0, 1], p=[0.7, 0.3])

    data.append([
        event_ids[i],
        user_ids[i],
        random_timestamp(),
        action,
        item_ids[i],
        product,
        eco_score,
        refurbished,
        delivery_type,
        used_green_toggle,
        f"session_{user_ids[i]}_{random.randint(1, 100)}"
    ])

In [25]:
columns = [
    'event_id', 'user_id', 'timestamp', 'action_type',
    'item_id', 'product_type', 'eco_score',
    'refurbished', 'delivery_type', 'used_green_toggle', 'session_id'
]

In [26]:
df = pd.DataFrame(data, columns=columns)

df

Unnamed: 0,event_id,user_id,timestamp,action_type,item_id,product_type,eco_score,refurbished,delivery_type,used_green_toggle,session_id
0,1,475,2024-02-14 02:13:12,view,4875,Air Conditioner (AC),437.13,0,instant,0,session_475_76
1,2,479,2025-01-04 22:58:49,view,2608,Geyser/Water Heater,123.48,1,instant,0,session_479_86
2,3,323,2025-01-08 08:58:45,refurbished_purchase,2202,Refrigerator (Fridge),177.14,1,delayed,1,session_323_34
3,4,345,2025-01-01 17:59:38,view,4072,Water Purifier,174.51,0,instant,0,session_345_55
4,5,353,2024-02-14 05:31:29,view,3673,Refrigerator (Fridge),374.10,0,instant,1,session_353_22
...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,381,2024-10-02 18:09:03,view,1460,Mixer Grinder,113.93,1,instant,1,session_381_18
9996,9997,385,2024-03-06 19:03:21,refurbished_purchase,4938,Air Conditioner (AC),150.91,1,instant,1,session_385_72
9997,9998,147,2024-06-27 02:25:13,view,1544,Refrigerator (Fridge),381.32,0,instant,0,session_147_4
9998,9999,386,2024-03-07 11:08:29,view,2483,Cooler,125.09,0,instant,1,session_386_67


In [27]:
df.to_csv("Dataset/Synthetic_Dataset/user_green_engagement_logs.csv", index=False)