# For loop method

In [None]:
import pickle
import pandas as pd
import numpy as np
import statsmodels.api as sm

def run_regression_n_get_beta(x_series: pd.Series, y_series: pd.Series):
    """
    This function run a regression based on the x_series, y_series (pd.Series) input, print out the each model summary and output the beta
    
    """
    # Prepared the data for running regression & drop nan values
    df = pd.DataFrame({x_series.name: x_series, y_series.name: y_series}).dropna()
    
    if not df.empty: # Make sure there are data to be inputted to the regression
        
        # define predictor and response variables
        y = df[y_series.name]
        x = df[x_series.name]

        # add constant to predictor variables
        x = sm.add_constant(x)

        #fit linear regression model
        model = sm.OLS(y, x).fit()

        #view model summary
        print(model.summary())
        
        # Extract the beta coefficients
        beta = model.params[x_series.name]
        return beta
        
    else:
        print('NaN')
        return None


# Load the data
data = pickle.load(open('testdata.pickle', "rb" ))

df_momentum = data['momentum']
df_return = data['return']

# Align the columns for the 2 dataframes
df_momentum = df_momentum.loc[:, df_return.columns]

# Run regression
beta_list = []
for month in df_momentum.index:
    print(f'Monthly Regression for: {month}')
    x_series = df_momentum.loc[month]
    x_series.name = 'Momentum'
    y_series = df_return.loc[month]
    y_series.name = 'Return'
    beta_list.append(run_regression_n_get_beta(x_series, y_series))
    
# Turn the list into an array
beta_array = np.array(beta_list)

# Drop None values
beta_array = beta_array[beta_array != None]

# Calculate mean & std
print(f'The mean of beta is {np.mean(beta_array)} and the standard deviation of beta is {np.std(beta_array)}')


# Pandas apply method

In [None]:
def run_regression_n_get_beta(x_series: pd.Series, y_series: pd.Series):
    """
    This function run a regression based on the x_series, y_series (pd.Series) input, print out the each model summary and output the beta
    
    """

    # Define predictor and response variables
    y = y_series 
    x = x_series

    # Add constant to predictor variables
    x = sm.add_constant(x)

    # Fit linear regression model
    model = sm.OLS(y, x).fit()

    # View model summary
    print(model.summary())

    # Extract the beta coefficients
    beta = model.params[x_series.name]
    return beta

# Load the data
data = pickle.load(open('testdata.pickle', "rb" ))

df_momentum = data['momentum']
df_return = data['return']

# Transform the table
df_momentum_melted = pd.melt(df_momentum.reset_index(), id_vars='index', var_name='RIC', value_name='Momentum')
df_momentum_melted = df_momentum_melted.rename(columns={'index': 'Date'})

df_return_melted = pd.melt(df_return.reset_index(), id_vars='index', var_name='RIC', value_name='Return')
df_return_melted = df_return_melted.rename(columns={'index': 'Date'})

# Merge 2 dataframes into 1
df_merged = df_momentum_melted.merge(df_return_melted, on=['Date', 'RIC'], how='inner')

# Run the regression by group
beta_array = df_merged.dropna().groupby('Date').apply(lambda x: run_regression_n_get_beta(x['Momentum'], x['Return']))
beta_array

# Calculate mean & std
print(f'The mean of beta is {np.mean(beta_array)} and the standard deviation of beta is {np.std(beta_array)}')
