In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
# read in 13F results
file_name = '/content/gdrive/My Drive/capstone/full database/Results_20160630_20200630.csv'
df = pd.read_csv(file_name)

In [4]:
# read in list of investors of interest
ls_investors = pd.read_csv('/content/gdrive/My Drive/capstone/full database/ListOfInvestor.csv').LgcyInvestorID.to_list()
print('There are {} investors which are we interested in.'.format(len(ls_investors)))

There are 229 investors which are we interested in.


In [5]:
# read in fund info
df_fund = pd.read_csv('/content/gdrive/My Drive/capstone/full database/vwOwn2Funds.csv')

In [6]:
id_not_in_list =  set(ls_investors) - set(df_fund.LgcyInvestorId)
id_in_list = set(ls_investors) & set(df_fund.LgcyInvestorId)

In [7]:
# 28 investors in list not have active/passive info
df[df.LGCYINVESTORID.isin(id_not_in_list)][['LGCYINVESTORID', 'FULLNAME']].drop_duplicates().to_csv('no_orientation.csv')

In [8]:
# check no missing value in InvOrientation
sum(df_fund.InvOrientation.isna())

0

In [9]:
# get investor list which has fund info
df_fund_listed = df_fund[df_fund.LgcyInvestorId.isin(id_in_list)]

# calculate the proportion of active funds
def active_pct(df):
  return sum(df== 'Active')/len(df)

df_orientation = df_fund_listed.groupby(['LgcyInvestorId']).agg({'InvOrientation': active_pct, 'FundId':'count'}).reset_index()
df_orientation.rename(columns={'InvOrientation':'ActivePct', 'FundId':'RecordCount'},inplace=True)

# consider the investor as active if more than 50% of the funds are active
df_orientation['isActive'] = df_orientation.ActivePct >=0.5

# append investor full name
df_orientation = pd.merge(df_orientation, df[['LGCYINVESTORID', 'FULLNAME']].drop_duplicates(), 
                          left_on='LgcyInvestorId', right_on='LGCYINVESTORID')
df_orientation.drop(columns='LGCYINVESTORID', inplace=True)

In [38]:
df_orientation.to_csv('active_investor_list.csv',index=False)

In [None]:
# 17 investors are passive, 184 investors are active 
df_orientation.isActive.value_counts()

True     184
False     17
Name: isActive, dtype: int64

In [None]:
df_orientation[df_orientation.isActive==False]

Unnamed: 0,LgcyInvestorId,InvOrientation,FundId,isActive,FULLNAME
22,2002506,0.492647,136,False,BNY Mellon Asset Management
56,2003245,0.35167,508,False,State Street Global Advisors (US)
64,2003356,0.160976,1230,False,"BlackRock Institutional Trust Company, N.A."
68,2003671,0.279534,599,False,BlackRock Advisors (UK) Limited
81,2004260,0.398664,440,False,"The Vanguard Group, Inc."
85,2004351,0.444444,134,False,"Charles Schwab Investment Management, Inc."
95,2005078,0.377049,53,False,BlackRock Investment Management (Australia) Ltd.
102,2005964,0.336,500,False,Guggenheim Investments
129,2401343,0.05102,98,False,BlackRock Asset Management Deutschland AG
141,2463029,0.161074,149,False,BlackRock Asset Management Canada Limited
