In [155]:
from pathlib import Path

# This should match your project root
BASE_DIR = Path.cwd().parent.parent.parent

DATA_RAW = BASE_DIR / "data" / "raw"
DATA_PROCESSED = BASE_DIR / "data" / "processed"

print("BASE_DIR:", BASE_DIR)
print("DATA_RAW exists:", DATA_RAW.exists())
print("DATA_PROCESSED exists:", DATA_PROCESSED.exists())


BASE_DIR: /files/capstone_project/game-market-event-analyzer
DATA_RAW exists: True
DATA_PROCESSED exists: True


In [156]:
import pandas as pd

events_path = DATA_RAW / "events.csv"

# Use the correct encoding (try latin-1 first - most common for European data)
events = pd.read_csv(events_path, sep=";", encoding="latin-1")

print("Columns:", events.columns.tolist())
print(f"✅ Loaded {len(events)} events")
events.head()

Columns: ['event_id', 'date', 'publisher', 'ticker', 'studio', 'is_rockstar', 'game', 'franchise', 'event_type', 'sentiment', 'impact_expectation_manual', 'source_url', 'notes']
✅ Loaded 39 events


Unnamed: 0,event_id,date,publisher,ticker,studio,is_rockstar,game,franchise,event_type,sentiment,impact_expectation_manual,source_url,notes
0,TTWO_2011_GTA5_ANNOUNCEMENT,25.10.11,Take-Two,TTWO,Rockstar Games,1,GTA V,GTA,Major_event,Positive,Medium,https://www.rockstargames.com/newswire/article...,Rockstar announces Grand Theft Auto V with fir...
1,TTWO_2013_GTA5_RELEASE,17.09.13,Take-Two,TTWO,Rockstar Games,1,GTA V,GTA,Game_release,Positive,High,https://www.rockstargames.com/newswire/article...,GTA V official global release; record-breaking...
2,TTWO_2013_GTAONLINE_LAUNCH,01.10.13,Take-Two,TTWO,Rockstar Games,1,GTA Online,GTA,Game_release,Positive,Medium,https://www.rockstargames.com/newswire/article...,Launch of GTA Online as an online component to...
3,TTWO_2014_GTAONLINE_HEISTS,09.12.14,Take-Two,TTWO,Rockstar Games,1,GTA Online,GTA,Major_event,Positive,Medium,https://www.rockstargames.com/newswire/article...,GTA Online Heists trailer; major content updat...
4,TTWO_2015_GTA5_PC_RELEASE,14.04.15,Take-Two,TTWO,Rockstar Games,1,GTA V,GTA,Game_release,Positive,Medium,https://www.rockstargames.com/newswire/article...,PC release of GTA V with enhanced graphics and...


In [157]:
# Normalize date and ticker
events["date"] = pd.to_datetime(events["date"], dayfirst=True, errors="coerce")
events["ticker"] = events["ticker"].astype(str).str.upper()

# Fix Ubisoft ticker: map empty/UBI.PA to UBSFY
events.loc[events["publisher"] == "Ubisoft", "ticker"] = "UBSFY"
events["ticker"] = events["ticker"].str.replace("UBI.PA", "UBSFY", regex=False)

print(events[["event_id", "date", "ticker", "publisher"]].head(15))
print("Unique tickers in events:", events["ticker"].unique())


                                 event_id       date ticker   publisher
0             TTWO_2011_GTA5_ANNOUNCEMENT 2011-10-25   TTWO    Take-Two
1                  TTWO_2013_GTA5_RELEASE 2013-09-17   TTWO    Take-Two
2              TTWO_2013_GTAONLINE_LAUNCH 2013-10-01   TTWO    Take-Two
3              TTWO_2014_GTAONLINE_HEISTS 2014-12-09   TTWO    Take-Two
4               TTWO_2015_GTA5_PC_RELEASE 2015-04-14   TTWO    Take-Two
5             TTWO_2016_RDR2_ANNOUNCEMENT 2016-10-18   TTWO    Take-Two
6           NTDOY_2017_ZELDA_BOTW_RELEASE 2017-03-03  NTDOY    Nintendo
7                NTDOY_2017_SWITCH_LAUNCH 2017-03-03  NTDOY    Nintendo
8                    TTWO_2017_RDR2_DELAY 2017-05-22   TTWO    Take-Two
9               EA_2017_SWBF2_CONTROVERSY 2017-11-17     EA          EA
10                 TTWO_2018_RDR2_RELEASE 2018-10-26   TTWO    Take-Two
11                    EA_2019_APEX_LAUNCH 2019-02-04     EA          EA
12                 EA_2019_ANTHEM_RELEASE 2019-02-22     EA     

  events["date"] = pd.to_datetime(events["date"], dayfirst=True, errors="coerce")


In [158]:
import pandas as pd

# Load prices to see if UBSFY exists
prices_path = DATA_PROCESSED / "prices_with_returns.csv"
prices = pd.read_csv(prices_path)

# ✅ CONVERT DATE TO DATETIME
prices["date"] = pd.to_datetime(prices["date"])
prices["ticker"] = prices["ticker"].astype(str).str.upper()

print("Unique tickers in prices:")
print(prices["ticker"].unique())
print(f"\nUBSFY rows: {len(prices[prices['ticker'] == 'UBSFY'])}")

# Check date range for UBSFY
ubsfy_prices = prices[prices['ticker'] == 'UBSFY']
print(f"\nUBSFY date range: {ubsfy_prices['date'].min()} to {ubsfy_prices['date'].max()}")

# Check Ubisoft event dates
ubsfy_events = events[events['ticker'] == 'UBSFY']
print(f"\nUbisoft event dates:")
print(ubsfy_events[['event_id', 'date', 'ticker']])

Unique tickers in prices:
['ATVI' 'EA' 'NTDOY' 'SP500' 'TTWO' 'UBSFY']

UBSFY rows: 3993

UBSFY date range: 2010-01-04 00:00:00 to 2025-11-14 00:00:00

Ubisoft event dates:
                                 event_id       date ticker
14  UBI_2019_GHOSTRECON_BREAKPOINT_LAUNCH 2019-10-04  UBSFY
22           UBI_2020_AC_VALHALLA_RELEASE 2020-11-10  UBSFY
31        UBI_2023_AC_MIRAGE_ANNOUNCEMENT 2022-09-10  UBSFY
32         UBI_2022_SKULL_AND_BONES_DELAY 2022-09-28  UBSFY


In [159]:
events_sorted = events.sort_values(["ticker", "date"]).reset_index(drop=True)
prices_sorted = prices.sort_values(["ticker", "date"]).reset_index(drop=True)

merged_list = []

for ticker in events_sorted["ticker"].unique():
    e = events_sorted[events_sorted["ticker"] == ticker].copy()
    p = prices_sorted[prices_sorted["ticker"] == ticker].copy()

    if p.empty:
        print(f"[WARN] no price data for ticker {ticker}, leaving NaNs.")
        e["trading_date"] = pd.NaT
        e["adj_close"] = pd.NA
        e["return"] = pd.NA
        e["market_return"] = pd.NA
        merged_list.append(e)
        continue

    # Rename for clarity BEFORE merge
    e = e.rename(columns={"date": "event_date"}).reset_index(drop=True)
    p = p.rename(columns={"date": "trading_date"}).reset_index(drop=True)

    # Sort by the renamed columns
    e = e.sort_values("event_date")
    p = p.sort_values("trading_date")

    # merge_asof requires sorted data
    tmp = pd.merge_asof(
        e,
        p,
        left_on="event_date",
        right_on="trading_date",
        direction="backward",
    )

    merged_list.append(tmp)

merged = pd.concat(merged_list, ignore_index=True)

print(f"✅ Merged {len(merged)} rows")
print(merged.head())

✅ Merged 39 rows
                        event_id event_date            publisher ticker_x  \
0     ATVI_2019_CODMOBILE_LAUNCH 2019-10-01           Activision     ATVI   
1        ATVI_2019_CODMW_RELEASE 2019-10-25           Activision     ATVI   
2   ATVI_2020_WARCRAFT3_REFORGED 2020-01-28  Activision Blizzard     ATVI   
3       ATVI_2020_WARZONE_LAUNCH 2020-03-10           Activision     ATVI   
4  ATVI_2021_DIABLO2_RESURRECTED 2021-09-23  Activision Blizzard     ATVI   

          studio  is_rockstar                          game     franchise  \
0   TiMi Studios            0          Call of Duty: Mobile  Call of Duty   
1  Infinity Ward            0  Call of Duty: Modern Warfare  Call of Duty   
2       Blizzard            0        Warcraft III: Reforged      Warcraft   
3  Infinity Ward            0         Call of Duty: Warzone  Call of Duty   
4       Blizzard            0        Diablo II: Resurrected        Diablo   

     event_type sentiment impact_expectation_manual  \
0 

In [160]:
print(f"✅ Merged {len(merged)} rows")
print("Columns:", merged.columns.tolist())

# Clean up duplicate columns from merge
if "ticker_x" in merged.columns and "ticker_y" in merged.columns:
    merged = merged.drop("ticker_y", axis=1)
    merged = merged.rename(columns={"ticker_x": "ticker"})

# Keep only the columns you need
cols_to_keep = ["event_id", "event_date", "trading_date", "ticker", "is_rockstar", "event_type", "sentiment", "impact_expectation_manual", "adj_close", "return", "market_return"]
merged = merged[[col for col in cols_to_keep if col in merged.columns]]

print(merged.head(10))

✅ Merged 39 rows
Columns: ['event_id', 'event_date', 'publisher', 'ticker_x', 'studio', 'is_rockstar', 'game', 'franchise', 'event_type', 'sentiment', 'impact_expectation_manual', 'source_url', 'notes', 'trading_date', 'ticker_y', 'adj_close', 'return', 'market_return']
                            event_id event_date trading_date ticker  \
0         ATVI_2019_CODMOBILE_LAUNCH 2019-10-01   2019-10-01   ATVI   
1            ATVI_2019_CODMW_RELEASE 2019-10-25   2019-10-25   ATVI   
2       ATVI_2020_WARCRAFT3_REFORGED 2020-01-28   2020-01-28   ATVI   
3           ATVI_2020_WARZONE_LAUNCH 2020-03-10   2020-03-10   ATVI   
4      ATVI_2021_DIABLO2_RESURRECTED 2021-09-23   2021-09-23   ATVI   
5     ATVI_2022_MSFT_ACQUISITION_ANN 2022-01-18   2022-01-18   ATVI   
6          EA_2017_SWBF2_CONTROVERSY 2017-11-17   2017-11-17     EA   
7                EA_2019_APEX_LAUNCH 2019-02-04   2019-02-04     EA   
8             EA_2019_ANTHEM_RELEASE 2019-02-22   2019-02-22     EA   
9  EA_2020_FIFA_ULT

In [161]:
print("Rows:", len(merged))
print("NaN in adj_close:", merged["adj_close"].isna().sum())
print("NaN in return:", merged["return"].isna().sum())
print("NaN in market_return:", merged["market_return"].isna().sum())

merged[["event_id", "event_date", "trading_date", "ticker", "is_rockstar", "event_type", "sentiment", "impact_expectation_manual", "adj_close", "return", "market_return"]].head(10)

Rows: 39
NaN in adj_close: 0
NaN in return: 0
NaN in market_return: 0


Unnamed: 0,event_id,event_date,trading_date,ticker,is_rockstar,event_type,sentiment,impact_expectation_manual,adj_close,return,market_return
0,ATVI_2019_CODMOBILE_LAUNCH,2019-10-01,2019-10-01,ATVI,0,Game_release,Positive,Medium,94.157463,-0.010938,-0.012258
1,ATVI_2019_CODMW_RELEASE,2019-10-25,2019-10-25,ATVI,0,Game_release,Positive,High,93.729248,0.003438,0.004073
2,ATVI_2020_WARCRAFT3_REFORGED,2020-01-28,2020-01-28,ATVI,0,Bad_review,Negative,Medium,108.901489,0.01212,0.010054
3,ATVI_2020_WARZONE_LAUNCH,2020-03-10,2020-03-10,ATVI,0,Game_release,Positive,High,100.609787,0.024274,0.049396
4,ATVI_2021_DIABLO2_RESURRECTED,2021-09-23,2021-09-23,ATVI,0,Game_release,Neutral,Medium,125.080391,0.011788,0.012135
5,ATVI_2022_MSFT_ACQUISITION_ANN,2022-01-18,2022-01-18,ATVI,0,Major_event,Positive,High,131.144638,0.026602,-0.018388
6,EA_2017_SWBF2_CONTROVERSY,2017-11-17,2017-11-17,EA,0,Bad_review,Negative,High,105.90403,-0.02491,-0.002626
7,EA_2019_APEX_LAUNCH,2019-02-04,2019-02-04,EA,0,Game_release,Positive,High,86.06041,-0.030586,0.006776
8,EA_2019_ANTHEM_RELEASE,2019-02-22,2019-02-22,EA,0,Game_release,Negative,High,93.349693,-0.009705,0.006411
9,EA_2020_FIFA_ULTIMATETEAM_REVENUE,2020-05-05,2020-05-05,EA,0,Major_event,Positive,Medium,116.404884,0.023795,0.009041


In [162]:
merged[merged["event_id"].str.contains("GTA6", case=False, na=False)][
    ["event_id", "event_date", "trading_date", "ticker", "adj_close", "return", "market_return"]
]

Unnamed: 0,event_id,event_date,trading_date,ticker,adj_close,return,market_return
27,TTWO_2022_GTA6_DEV-ANNOUNCEMENT,2022-02-04,2022-02-04,TTWO,175.0,0.073488,0.005157
30,TTWO_2023_GTA6_TRAILER1,2023-12-05,2023-12-05,TTWO,156.759995,-0.005077,-0.000569
31,TTWO_2025_GTA6_DELAY1,2025-05-02,2025-05-02,TTWO,219.5,-0.066633,0.014727
32,TTWO_2025_GTA6_TRAILER2,2025-05-06,2025-05-06,TTWO,231.839996,0.028937,-0.007693
33,TTWO_2025_GTA6_DELAY2,2025-11-06,2025-11-06,TTWO,252.399994,-0.009264,-0.011178


In [163]:
out_path = DATA_PROCESSED / "events_with_returns.csv"
merged.to_csv(out_path, index=False)
out_path

PosixPath('/files/capstone_project/game-market-event-analyzer/data/processed/events_with_returns.csv')