In [1]:
import pandas as pd
import numpy as np

In [2]:
file_path = 'eur_data.xlsx'
df = pd.read_excel(file_path)

# Retain metadata columns separately
metadata_columns = ['Ticker', 'ShortName', 'Sector', 'Region', 'Country']
metadata = df[metadata_columns]
metadata.to_csv('metadata.csv', index=False)

df.drop(columns=metadata_columns[1:] + ['Unnamed: 0'], inplace=True)
df.set_index('Ticker', drop=True, inplace=True)
df = df.T
df.index.name = "Date"
df.to_csv('reshaped_data.csv')


In [3]:
data = pd.read_csv('reshaped_data.csv', parse_dates=['Date'], index_col='Date')
data = data.apply(pd.to_numeric, errors='coerce')

# Calculate log returns for all columns at once
log_returns = np.log(data).diff()
log_returns.dropna(inplace=True)

# Standardize returns in a vectorized way
standardized_returns = (log_returns - log_returns.mean()) / log_returns.std()

# Save to separate CSV files
log_returns.to_csv('eur_data_log_returns.csv')
standardized_returns.to_csv('eur_data_standardized_returns.csv')