In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_regression, f_classif
from scipy.stats import pearsonr
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('/content/rewards_dataset.csv')
df_emotions = pd.read_csv('/content/unique_employees_weighted_emotion_vibe.csv')

In [None]:
df.head()

Unnamed: 0,Employee_ID,Award_Type,Award_Date,Reward_Points
0,EMP0182,Innovation Award,2023-01-01,205
1,EMP0338,Leadership Excellence,2023-01-02,494
2,EMP0090,Best Team Player,2023-01-03,451
3,EMP0133,Best Team Player,2023-01-04,393
4,EMP0156,Star Performer,2023-01-05,271


In [None]:
df_emotions.head()

Unnamed: 0,Employee_ID,Weighted_Emotion_Score,Weighted_vibe_Score
0,EMP0002,0.0,1.0
1,EMP0003,2.0,2.9996
2,EMP0004,3.0,4.0
3,EMP0005,0.0,3.0
4,EMP0006,5.0,5.0


In [None]:
df.sort_values(by=["Employee_ID", "Award_Date"], ascending=[True, False], inplace=True)
df.head()

Unnamed: 0,Employee_ID,Award_Type,Award_Date,Reward_Points
255,EMP0001,Innovation Award,2023-09-13,63
308,EMP0002,Best Team Player,2023-11-05,409
335,EMP0003,Leadership Excellence,2023-12-02,346
58,EMP0003,Star Performer,2023-02-28,362
221,EMP0004,Leadership Excellence,2023-08-10,168


In [None]:
df['Award_Type'] = 1
df

Unnamed: 0,Employee_ID,Award_Type,Award_Date,Reward_Points
255,EMP0001,1,2023-09-13,63
308,EMP0002,1,2023-11-05,409
335,EMP0003,1,2023-12-02,346
58,EMP0003,1,2023-02-28,362
221,EMP0004,1,2023-08-10,168
...,...,...,...,...
462,EMP0498,1,2024-04-07,299
379,EMP0498,1,2024-01-15,395
321,EMP0498,1,2023-11-18,442
206,EMP0498,1,2023-07-26,351


In [None]:
df["Award_Date"] = pd.to_datetime(df["Award_Date"], errors='coerce')
df.sort_values(by=["Employee_ID", "Award_Date"], ascending=[True, False], inplace=True)


decay_factor = 1 / 5

df["Days_Since"] = df.groupby("Employee_ID")["Award_Date"].transform(lambda x: (x.max() - x).dt.days)


df["Weight"] = np.exp(-decay_factor * df["Days_Since"])



df



Unnamed: 0,Employee_ID,Award_Type,Award_Date,Reward_Points,Days_Since,Weight
255,EMP0001,1,2023-09-13,63,0,1.000000e+00
308,EMP0002,1,2023-11-05,409,0,1.000000e+00
335,EMP0003,1,2023-12-02,346,0,1.000000e+00
58,EMP0003,1,2023-02-28,362,277,8.711355e-25
221,EMP0004,1,2023-08-10,168,0,1.000000e+00
...,...,...,...,...,...,...
462,EMP0498,1,2024-04-07,299,0,1.000000e+00
379,EMP0498,1,2024-01-15,395,83,6.176061e-08
321,EMP0498,1,2023-11-18,442,141,5.661032e-13
206,EMP0498,1,2023-07-26,351,256,5.809283e-23


In [None]:
weighted_df = df.groupby("Employee_ID").apply(
    lambda emp: pd.Series({
        "Weighted_Reward_Points": np.sum(emp["Reward_Points"] * emp["Weight"]) / np.sum(emp["Weight"]),
        "Weighted_Award_Type": np.sum(emp["Award_Type"])
    })
).reset_index()

weighted_df.head()


  weighted_df = df.groupby("Employee_ID").apply(


Unnamed: 0,Employee_ID,Weighted_Reward_Points,Weighted_Award_Type
0,EMP0001,63.0,1.0
1,EMP0002,409.0,1.0
2,EMP0003,346.0,2.0
3,EMP0004,168.000002,3.0
4,EMP0007,108.0,1.0


In [None]:
num_unique_employees = weighted_df['Employee_ID'].nunique()
num_rows = weighted_df.shape[0]

if num_unique_employees == num_rows:
  print("The number of unique employee IDs is equal to the number of rows.")
else:
  print("The number of unique employee IDs is not equal to the number of rows.")


The number of unique employee IDs is equal to the number of rows.


In [None]:
output_path = "unique_employees_weighted_reward.csv"
weighted_df.to_csv(output_path, index=False)

print(f"Processed data saved to {output_path}")


Processed data saved to unique_employees_weighted_reward.csv
