In [8]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

# Functions

**Function** \
get_factor: for each date, selected the best equities by the factor chosen \
**Attributes** \
df: original dataset \
factor_name: name of factor \
number_of_equities: number of equities to select

In [10]:
def get_factor(df, factor_name, number_of_equities):   
    temp_dict = {}

    # Get all dates in order
    dates = list(set(df['date']))

    # Get the top 10 equities for each date
    for date in dates:
        temp_dict[date] =  df.loc[df['date'] == date].sort_values(by=['date',factor_name], ascending=False)['equity'].head(number_of_equities).values

    # Create a dataframe with the top 10 equities for each date
    df_factor_chosen = pd.DataFrame.from_dict(temp_dict, orient='index').sort_index()

    return df_factor_chosen

df_returns: dataframe with date and price for each equity

In [11]:
def df_returns(df):
    # Select only equities, date and price
    df_returns = df[['equity','date','PX_LAST']]
    # Sort by equity and date
    df_returns.sort_values(by=['equity','date'], inplace=True)
    # Put the column price as a value of the pivot table
    df_returns = df_returns.pivot(index='date', columns='equity', values='PX_LAST')
    return df_returns

df_log_returns: log returns for each equity by doing log( price(t) / price(t+1) )

In [12]:
# Get the log returns
df_log_returns = df_returns[df_returns.columns].apply(lambda x: np.log(x / x.shift(1)))

# Get the benchmark of returns
benchmark_returns = df_log_returns.dropna(how='all').mean(skipna=True, axis='columns')

**Function** \
returns_of_factor: add a column into the df with the returns of each row (date) \

**Attributes** \
df_factor_chosen: df with the best n equities for each date by a factor choosen before \
df_log_returns: 

In [13]:
def returns_without_fees(df_factor_chosen, df_log_returns):
    sum_row_returns = 0
    list_sum_row_returns = []
    for index, row in df_factor_chosen.iterrows():
        for equity in row:
            sum_row_returns += np.nansum(df_log_returns.loc[index][equity])
        list_sum_row_returns.append(round(sum_row_returns,2))
        sum_row_returns = 0

    df_factor_chosen_returns['returns'] = list_sum_row_returns
    df_factor_chosen_returns = df_factor_chosen_returns.iloc[1:,:]
    return df_factor_chosen_returns

In [None]:
def returns_with_fees(df_factor_chosen, df_log_returns, commission_fee):
    sum_row_returns = 0
    list_sum_row_returns = []
    for index, row in df_factor_chosen.iterrows():
        for equity in row:           
            rtn_before_commission = np.nansum(df_log_returns.loc[index][equity])
            #print(rtn_before_commission)
            rtn_after_commission = rtn_before_commission - commission_fee * (1 + rtn_before_commission)
            #print(rtn_after_commission)
            sum_row_returns += rtn_after_commission

        list_sum_row_returns.append(round(sum_row_returns,2))
        sum_row_returns = 0

    df_factor_chosen_returns['returns'] = list_sum_row_returns
    df_factor_chosen_returns = df_factor_chosen_returns.iloc[1:,:]

    return df_factor_chosen_returns

**Function** \
get_information_ratio: returns the information ratio of the chosen factor \
**Attributes** \
df_factor_chosen_returns: df given by the function returns_of_factor \
benchmark_returns: 

In [14]:
def get_information_ratio(df_factor_chosen_returns, benchmark_returns):
    df_factor_chosen_returns['alpha'] = df_factor_chosen_returns['returns'] - benchmark_returns
    information_ratio = df_factor_chosen_returns['alpha'].mean() / df_factor_chosen_returns['alpha'].std()
    
    return information_ratio

0.1583179104925697

# MAIN PREVIEW

## General (one time execution)

In [None]:
# Read the data
df = pd.read_csv('data/data.csv').set_index(['equity', 'date'])

# Get the returns
df_returns = df_returns(df)

# Get the log returns
df_log_returns = df_returns[df_returns.columns].apply(lambda x: np.log(x / x.shift(1)))

# Get the benchmark of returns
benchmark_returns = df_log_returns.dropna(how='all').mean(skipna=True, axis='columns')

## Factors

In [None]:
# Choose factor and number of equities
factor_name = 'EBITDA_MARGIN'
number_of_equities = 10

# df chosen factor
df_factor = get_factor(df, factor_name, number_of_equities)

### No fees

In [None]:
# Get the returns of the factor by each date
df_factor_returns = returns_without_fees(df_factor, df_log_returns)

# Get the information ratio
information_ratio = get_information_ratio(df_factor_returns, benchmark_returns)

### Fees 

In [None]:
# Set the commission fee
commission_fee = 0.002

# Get the returns of the factor by each date
df_factor_returns = returns_with_fees(df_factor, df_log_returns, commission_fee)

# Get the information ratio
information_ratio = get_information_ratio(df_factor_returns, benchmark_returns)