In [9]:
import pandas as pd
import numpy as np

In [10]:
logs = pd.read_csv('Dataset/Synthetic_Dataset/user_green_engagement_logs.csv')

In [11]:
grouped = logs.groupby('user_id')

In [12]:
rows = []
for user_id, df in grouped:
    refurbished_count = df[df['refurbished'] == 1].shape[0]
    delayed_count = df[df['delivery_type'] == 'delayed'].shape[0]
    green_toggle_count = df[df['used_green_toggle'] == 1].shape[0]
    
    green_action = df[df['used_green_toggle'] == 1]
    eco_saving = max(0, 450 * len(green_action) - green_action['eco_score'].sum()) ## Assuming 450 is the max eco score per action
    avg_eco_saved = eco_saving / len(green_action) if len(green_action) > 0 else 0
    
    total_green_actions = refurbished_count + delayed_count + green_toggle_count
    
    reward_points = total_green_actions * 10 + avg_eco_saved * 0.1 # Reward points: 10 pts per green action, 0.1 pt per kg CO2 saved
    
    if reward_points >= 320:
        tier = 'Platinum'
    elif reward_points >= 240:
        tier = 'Gold'
    elif reward_points >= 160:
        tier = 'Silver'
    else:
        tier = 'Bronze'
        
    rows.append([
        user_id, 
        total_green_actions, 
        round(avg_eco_saved, 2),
        refurbished_count,
        delayed_count,
        green_toggle_count,
        int(reward_points),
        tier
    ])

In [13]:
columns = [
    'user_id', 'total_green_actions', 'avg_eco_score_saved',
    'refurbished_count', 'delayed_delivery_count', 'green_toggle_used',
    'reward_points', 'tier'
]

In [14]:
rewards_df = pd.DataFrame(rows, columns=columns)
rewards_df.to_csv('Dataset/Synthetic_Dataset/user_green_rewards.csv', index=False)