In [None]:
import pandas as pd
import numpy as np

In [None]:
# Read in data
uni = pd.read_csv('EM_universe.csv', index_col='date')

In [None]:
# Set index to datetime and sort
uni.index = pd.to_datetime(uni.index)
uni = uni.sort_index()

In [None]:
uni.head()

In [None]:
# Drop unwanted columns
uni = uni[['name','id','Market Capitalisation','Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Market Risk Factor Loading', 'SMB Factor Loading', 
           'HML Factor Loading', 'RMW Factor Loading', 'CMA Factor Loading', 'intercept','excess_ret_adjusted','return_adjusted']]

In [None]:
# calculate the expected excess return via the Fama-French five factor model
uni['expected returns'] = (uni['Mkt-RF']*uni['Market Risk Factor Loading'])+(uni['SMB']*uni['SMB Factor Loading'])+(uni['HML']*uni['HML Factor Loading'])+(uni['RMW']*uni['RMW Factor Loading'])+(uni['CMA']*uni['CMA Factor Loading'])+uni['intercept']

In [None]:
# Function to run the simulation. Use the Fama-French model to pick stocks at each period and save the results
def simulation(data, periods):
    dates = data.index.unique()
    column_names = ['Returns', 'Length']
    # Create an empty DataFrame with column names
    results_df = pd.DataFrame(columns=column_names)

    # Itterate through dates
    for i in range(5, len(dates)-periods):
        
        # Get rolling window data
        window_data = data[data.index.isin(dates[i:i+periods+1])]
        # Get window data where more than periods/2 periods are available for each stock
        window_data = window_data[window_data.groupby('name').name.transform('count')>((periods/2)+1)].copy()
        # Get current period data
        current_data = window_data[window_data.index == dates[i+periods]].set_index('id')
        
        # Get equities where Fama-French predicts top quartile returns
        percentile = np.percentile(current_data['expected returns'].dropna(), 75)
        current_data = current_data[current_data['expected returns'] >= percentile]
        
        # Store resulsts
        results_df.loc[dates[i+periods],:] = [(current_data['return_adjusted'].mean()+1), len(current_data)]
        
    return results_df

In [None]:
# Run the simulation
ret = simulation(uni, 12)

In [None]:
# Drop the results
ret.index.name = 'date'
ret.to_csv('ret_FF.csv',index=True)