In [8]:
import pandas as pd
import numpy as np
from pandas import DataFrame
import math

In [9]:
# Read dataframes
df_Market_Cap = pd.read_pickle("UsableData/MarketCap.pkl")
df_Employees = pd.read_pickle("UsableData/Employees.pkl")
df_Book_Value = pd.read_pickle("UsableData/BookValue.pkl")
df_Variance = pd.read_pickle("UsableData/Variance.pkl")
df_Returns = pd.read_pickle("UsableData/Returns.pkl").T

In [10]:
# Use existing dataframe as template and set all cloumns to False and data type to Boolean
df_eligible = df_Market_Cap.copy(deep=True)
for column in df_eligible:
    df_eligible[column].astype("bool")
    df_eligible[column] = False

In [11]:
# Create method that checks if there are non NaN returns in one year and therefore the stock is usable in that year in regards to the return data
def unusableReturns(ric, year):
    selection = df_Returns[ric][(df_Returns.index >= f"{year}-01-01") & (df_Returns.index <= f"{year}-12-31")]
    return bool(selection.isna().values.any())

In [12]:
# Iterate through every year and see which stocks are eligible
years = df_Market_Cap.columns.to_numpy()[:-1] # Exclude first year, because no data from previous year to calculate weights
for ric in df_Market_Cap.index: # Go through every stock
    for year in years: # In every year
        mc = df_Market_Cap.loc[ric, year]
        emp = df_Employees.loc[ric, year]
        bv = df_Book_Value.loc[ric, year]
        var = df_Variance.loc[ric, year]
        if(math.isnan(mc) or math.isnan(emp) or math.isnan(bv) or math.isnan(var) or unusableReturns(ric, year+1)): # If one of the data for weights (mc, emp, bv, var) is nan or return for next year stock is not eligible
            df_eligible.at[ric, year+1] = False
        else: # If all the data is there, stock is eligible
            df_eligible.at[ric, year+1] = True
df_eligible.drop(columns=[years[0]], inplace=True) # Drop first and last year
df_eligible.head(5)

Year,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
RIC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
05P.D,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
0955.HK^D12,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
0FIBG.DE^F10,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
0RN6.L^G19,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
123F.DE,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [13]:
# Count how many stocks are eligible in each year
for i in range(len(df_eligible.columns)):
    df = df_eligible[df_eligible[df_eligible.columns[i]] == True]
    print(f"{df_eligible.columns[i]}: {len(df.index)}")

1996: 2
1997: 21
1998: 33
1999: 98
2000: 193
2001: 283
2002: 344
2003: 370
2004: 431
2005: 447
2006: 487
2007: 547
2008: 577
2009: 586
2010: 555
2011: 570
2012: 581
2013: 550
2014: 532
2015: 470
2016: 449
2017: 442
2018: 448
2019: 456
2020: 487
2021: 522


In [14]:
# Write dataframe
df_eligible.to_pickle("UsableData/EligibleStocks.pkl")