**Standardize the file**

*Installing and calling the necessary libraries*

In [None]:
!pip install pandas-datareader --quiet

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from pandas_datareader.data import DataReader
from scipy.stats import chi2
from datetime import timedelta

Importing the CSV file containing the information about each asset analyze for adjusting, including the name of the asset (for stocks indicating which market they come from), and the currency they have

In [None]:
market_info = pd.read_csv("clean_company_listings.csv")
market_info.columns = market_info.columns.str.strip()
market_currency_map = {
    'NASDAQ': 'USD',
    'NYSE': 'USD',
    'London Stock Exchange': 'GBP',
    'LSE': 'GBP',
    'Tokyo Stock Exchange': 'JPY',
    'TSE': 'JPY',
    'Shanghai Stock Exchange': 'CNY',
    'SSE': 'CNY',
    'Hong Kong Stock Exchange': 'HKD',
    'HKEX': 'HKD',
    'Toronto Stock Exchange': 'CAD',
    'TSX': 'CAD',
    'Australian Securities Exchange': 'AUD',
    'ASX': 'AUD',
    'Euronext': 'EUR',
    'Frankfurt Stock Exchange': 'EUR',
    'FWB': 'EUR',
}

market_info['Currency'] = market_info['stock offering'].map(market_currency_map)
market_info['Currency'] = market_info['Currency'].fillna('Unknown')
print(market_info.head())

Defining a function that get, for the currency specified in the assets_markets_file, the correct exchange rate to convert, if needed, to USD and convert

In [None]:
def get_exchange_rate(currency, start_date, end_date):
    """Get daily exchange rate to USD, safely handling missing data."""
    # If already in USD, no conversion needed
    if currency.upper() == 'USD':
        dates = pd.date_range(start=start_date, end=end_date)
        return pd.DataFrame({'Date': dates, 'Rate_to_USD': [1.0] * len(dates)})

    symbol = f"{currency}USD=X"
    print(f"Fetching exchange rate for {symbol}...")

    try:
        data = yf.download(symbol, start=start_date, end=end_date)
    except Exception as e:
        print(f"⚠️ Failed to download {symbol}: {e}")
        data = pd.DataFrame()

    # Handle empty or invalid downloads gracefully
    if data.empty or 'Close' not in data.columns:
        print(f"⚠️ No valid data for {symbol}. Using 1.0 as fallback rate.")
        dates = pd.date_range(start=start_date, end=end_date)
        return pd.DataFrame({'Date': dates, 'Rate_to_USD': [1.0] * len(dates)})

    # Prepare the result
    data = data.reset_index()[['Date', 'Close']].rename(columns={'Close': 'Rate_to_USD'})
    print(f"✅ Downloaded {len(data)} daily rates for {symbol}")
    return data


def convert_to_usd(df, currency, exchange_rates):
    """Merge exchange rates and convert values to USD."""
    df['Date'] = pd.to_datetime(df['Date']).dt.tz_localize(None)
    exchange_rates['Date'] = pd.to_datetime(exchange_rates['Date']).dt.tz_localize(None)

    merged = df.merge(exchange_rates, on='Date', how='left')

    if merged['Rate_to_USD'].isna().any():
        print(f"⚠️ Missing rates detected for {currency}, filling forward/backward.")
        merged['Rate_to_USD'].ffill(inplace=True)
        merged['Rate_to_USD'].bfill(inplace=True)

    merged['Value_USD'] = merged['Value'] * merged['Rate_to_USD']
    return merged[['Date', 'Value_USD']]


Once all the prices are  in USD, proceed to adjust the values for the inflation index using the CPI (Consumer Price Index) from the FRED.

In [None]:
def get_inflation_index(start_date, end_date):
    """Fetch daily interpolated US CPI from FRED and clean NaNs."""
    try:
        cpi = DataReader('CPIAUCNS', 'fred', start_date - pd.DateOffset(days=31), end_date)
    except Exception as e:
        print("FRED CPI fetch failed:", e)
        return pd.DataFrame({'Date': pd.date_range(start=start_date, end=end_date), 'CPI_USD': np.nan})

    cpi = cpi.reset_index().rename(columns={'DATE': 'Date', 'CPIAUCNS': 'CPI_USD'})
    cpi['Date'] = pd.to_datetime(cpi['Date'])

    daily_index = pd.DataFrame({'Date': pd.date_range(start=start_date, end=end_date)})

    daily_index = pd.merge_asof(daily_index.sort_values('Date'),
                                cpi.sort_values('Date'),
                                on='Date',
                                direction='backward')

    daily_index['CPI_USD'] = daily_index['CPI_USD'].ffill().bfill().interpolate(method='linear')

    # Drop if still NaN
    if daily_index['CPI_USD'].isna().all():
        print("All CPI_USD values are NaN. Inflation adjustment skipped.")
    return daily_index


def adjust_for_inflation(df, inflation_index):
    if inflation_index['CPI_USD'].isna().all():
        df['Value_Adj_USD'] = df['Value_USD']
        return df[['Date','Value_Adj_USD']]

    df['Date'] = pd.to_datetime(df['Date']).dt.tz_localize(None)
    inflation_index['Date'] = pd.to_datetime(inflation_index['Date']).dt.tz_localize(None)
    merged = df.merge(inflation_index, on='Date', how='left')

    # Normalize to today CPI
    latest_cpi = inflation_index['CPI_USD'].iloc[-1]
    merged['Value_Adj_USD'] = merged['Value_USD'] * (latest_cpi / merged['CPI_USD'])
    return merged[['Date','Value_Adj_USD']]


Final steps, running the CHI^2 test and p values for the systemic dislocation of the market

In [None]:
def chi2_systemic_dislocation(df, bins=10):
    from scipy.stats import norm

    df['Return'] = df['Value_Adj_USD'].pct_change()
    df['Dislocation'] = (df['Return'] - df['Return'].mean()) / df['Return'].std()

    disloc = df['Dislocation'].dropna()

    quantiles = np.linspace(0, 1, bins + 1)
    bin_edges = norm.ppf(quantiles)

    observed, _ = np.histogram(disloc, bins=bin_edges)
    expected = np.ones_like(observed) * (len(disloc) / bins)

    chi2_stat = ((observed - expected)**2 / expected).sum()
    p_val = 1 - chi2.cdf(chi2_stat, df=bins - 1)

    return df, chi2_stat, p_val

This final function calls upon all the pprevious functions once received in imput the raw asset file, outputting a list containing the return adjusted in USD and for inflation, as well as the dislocation from the market values.

In [None]:


def process_asset_file(asset_file):
    global market_info

    df = pd.read_csv(asset_file)
    df.columns = df.columns.str.strip()

    # rename lowercase headers to match the rest of your code
    rename_map = {
        'date': 'Date',
        'open': 'Open',
        'high': 'High',
        'low': 'Low',
        'close': 'Close',
        'adjclose': 'Adj Close',
        'volume': 'Volume'
    }
    df.rename(columns=rename_map, inplace=True)

    if "Date" not in df.columns:
        raise KeyError(f"'Date' column not found in {asset_file}. Columns: {df.columns.tolist()}")

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
    df['Value'] = df['Adj Close']

    asset_name = asset_file.split('/')[-1].replace('.csv', '')
    market_info = market_info.rename(columns={'symbol (acronym)': 'Asset'})#############

    currency = market_info.loc[market_info['Asset'] == asset_name, 'Currency'].values[0]

    market_info.loc[market_info['Asset'] == 'ABB', 'Currency'] = 'CHF'##############

    start_date = df['Date'].min() - timedelta(days=1)
    end_date = df['Date'].max() + timedelta(days=1)

    print("Asset:", asset_name, "Currency:", currency)################

    exchange_rates = get_exchange_rate(currency, start_date, end_date)
    inflation_index = get_inflation_index(start_date, end_date)

    df_usd = convert_to_usd(df, currency, exchange_rates)
    df_real = adjust_for_inflation(df_usd, inflation_index)

    df_chi, chi2_stat, p_val = chi2_systemic_dislocation(df_real)
    return df_chi, chi2_stat, p_val



In [None]:
#my testing
# print(market_info.columns.tolist())



# df = pd.read_csv("ABB.csv")
# print(df.columns.tolist())




This last bit is for testing, inputting one file at a time

In [None]:
df_chi, chi2_stat, p_val = process_asset_file("ABB.csv")
print(df_chi.head())
print(df_chi.tail())
print(f"Chi² statistic: {chi2_stat:.4f}, p-value: {p_val:.4f}")