In [54]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path.cwd().parent.parent.parent
DATA_RAW = BASE_DIR / "data" / "raw"
DATA_PROCESSED = BASE_DIR / "data" / "processed"

events = pd.read_csv(DATA_PROCESSED / "events_labeled.csv", sep=";")
events.head()

Unnamed: 0,event_id,event_date,trading_date,ticker,publisher,studio,is_rockstar,game,franchise,event_type,...,adj_close,return,market_return,AR_event,CAR_m1_p1,CAR_m5_p5,CAR_0_1,CAR_0_3,CAR_0_5,impact_label
0,ATVI_2019_CODMOBILE_LAUNCH,2019-10-01,2019-10-01,ATVI,Activision,TiMi Studios,0,Call of Duty: Mobile,Call of Duty,Release,...,94.157463,-0.010938,-0.012258,-0.001198,0.005278,-0.013979,-0.012633,-0.017333,-0.017333,Low
1,ATVI_2019_CODMW_RELEASE,2019-10-25,2019-10-25,ATVI,Activision,Infinity Ward,0,Call of Duty: Modern Warfare,Call of Duty,Release,...,93.729248,0.003438,0.004073,-0.000318,0.000526,-0.002328,-0.000318,-0.001063,-0.003259,Low
2,ATVI_2020_WARCRAFT3_REFORGED,2020-01-28,2020-01-28,ATVI,Activision Blizzard,Blizzard,0,Warcraft III: Reforged,Warcraft,Controversy,...,108.901489,0.01212,0.010054,0.003422,0.003371,-0.024705,0.006518,-0.020621,-0.020621,Low
3,ATVI_2020_WARZONE_LAUNCH,2020-03-10,2020-03-10,ATVI,Activision,Infinity Ward,0,Call of Duty: Warzone,Call of Duty,Release,...,100.609787,0.024274,0.049396,-0.016937,0.001569,-0.031189,0.001777,-0.034795,-0.034795,Low
4,ATVI_2021_LAWSUIT,2021-07-20,2021-07-20,ATVI,Activision Blizzard,,0,,Activision,Controversy,...,137.807159,-0.001204,0.015163,-0.014124,-0.02408,0.003237,-0.023806,-0.004505,-0.004505,Medium


In [59]:

target = "impact_label"
features = [
    "publisher", "studio", "is_rockstar", "event_type", "franchise", "sentiment",
    "market_return", "AR_event", "CAR_0_1", "CAR_m1_p1", "CAR_0_3", "CAR_0_5", "CAR_m5_p5"
]


In [56]:
df_ml = events[features + [target]].copy()

cat_cols = ["publisher", "studio", "event_type", "franchise", "sentiment"]
df_ml = pd.get_dummies(df_ml, columns=cat_cols, drop_first=True)

In [57]:
label_map = {"Low": 0, "Medium": 1, "High": 2}
df_ml["impact_label_num"] = df_ml[target].map(label_map)
df_ml.head()

Unnamed: 0,is_rockstar,market_return,AR_event,CAR_0_1,CAR_m1_p1,CAR_0_3,CAR_0_5,CAR_m5_p5,impact_label,publisher_Activision Blizzard,...,franchise_Red Dead,franchise_Skull and Bones,franchise_Star Wars,franchise_Switch,franchise_TTWO,franchise_Warcraft,franchise_Zelda,sentiment_neutral,sentiment_positive,impact_label_num
0,0,-0.012258,-0.001198,-0.012633,0.005278,-0.017333,-0.017333,-0.013979,Low,False,...,False,False,False,False,False,False,False,False,True,0
1,0,0.004073,-0.000318,-0.000318,0.000526,-0.001063,-0.003259,-0.002328,Low,False,...,False,False,False,False,False,False,False,False,True,0
2,0,0.010054,0.003422,0.006518,0.003371,-0.020621,-0.020621,-0.024705,Low,True,...,False,False,False,False,False,True,False,False,False,0
3,0,0.049396,-0.016937,0.001777,0.001569,-0.034795,-0.034795,-0.031189,Low,False,...,False,False,False,False,False,False,False,False,True,0
4,0,0.015163,-0.014124,-0.023806,-0.02408,-0.004505,-0.004505,0.003237,Medium,True,...,False,False,False,False,False,False,False,False,False,1


In [28]:
df_ml = df_ml.dropna()

In [29]:
X = df_ml.drop(columns=[target, "impact_label_num"])
y = df_ml["impact_label_num"]

In [30]:
out = DATA_PROCESSED / "ml_dataset.csv"
df_ml.to_csv(out, index=False)
out

PosixPath('/files/capstone_project/game-market-event-analyzer/data/processed/ml_dataset.csv')