In [19]:
import os
import numpy as np
import pandas as pd
import random

In [20]:
class AssetOverlap():
    def __init__(self):
        self.readFunds()
        self.readSP500()

        
    def readFunds(self):
        fundDir = os.listdir("../input/funds")
        funds = pd.DataFrame(columns=["Ticker", "ID"])
        for name in fundDir:
            fund = pd.read_csv("../input/funds/"+name, index_col=0)
            name = name.split(".csv")[0]
            
            fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
            fund = fund[["ID","Ticker",name]]
            fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)] #no puts so neg percent and #N/A Unclassified: Unable to parse request at ...  and # N/A Invalid Security
            fund = fund.dropna(axis=0, how='all')
            
            fund[name] = fund[name]/(fund[name].sum())*100 # some have over 100 percent
            
            fund.Ticker = fund.Ticker.fillna("NULL TICKER"+fund.ID)
            
            fund = fund.groupby(["Ticker","ID"]).agg({name:"sum"})# you have to groupby id and ticker or else it may merge multipletimes
            
            funds = funds.merge(fund, how='outer', on=["Ticker","ID"])
        
        
        params = {fundName:"sum" for fundName in self.getFundNames(funds)}
        params.update({"ID":lambda IDS: IDS.iloc[np.argmax([np.all([not char.isdigit() for char in ID]) for ID in IDS])] }) #first id that is all characters or first id
        funds = funds.groupby("Ticker").agg(params)
        funds = funds.reset_index()

        #print(funds.Ticker.duplicated().sum())
        
        funds = funds.fillna(0)
        self.funds = funds

    def getFundMatrix(self):
        return self.funds.loc[:,~self.funds.columns.isin(["ID","Ticker"])]
    
    def readSP500(self):
        self.sp500 = pd.read_csv("../input/s&p500.csv")
        self.sp500 = self.sp500.iloc[:,:3]
        self.sp500 = self.sp500.rename(columns={"Symbol":"Ticker", "Weight":"S&P500"})
        
    def getFundNames(self,funds):
        return [x for x in funds.columns if x not in ["ID", "Ticker"]]
    
    def makePortfolio(self,weights):
        #self.weights = self.weights.loc[self.getFundNames(self.funds)]
        columns = ["Ticker","ID"] + list(weights.index)
        self.funds = self.funds[columns]

        Portfolio = self.getFundMatrix().dot(weights)
        Portfolio = Portfolio.rename({Portfolio.columns[0]:"Portfolio"},axis=1)
        Portfolio[["Ticker","ID"]] = self.funds[["Ticker","ID"]]
        
        Portfolio = pd.merge(Portfolio, self.sp500, how='outer', on="Ticker")
        Portfolio.ID = Portfolio.ID.fillna(Portfolio.Company)
        Portfolio = Portfolio.drop("Company", axis=1)
        return Portfolio
    
    def makeAddStocksPortfolio(self,Portfolio):
        dif = Portfolio["S&P500"]-Portfolio["Portfolio"] # determine which sp500 assets have more than in the portfolio
        self.extra = Portfolio
        self.extra["Dif"] = dif
        self.extra = self.extra[self.extra.Dif>0][["Ticker","ID","Dif"]]
        self.extra = self.extra.rename(columns={"Dif":"Portfolio"})
        AddStocksPortfolio = pd.concat([Portfolio[["Ticker","ID","Portfolio"]],self.extra],axis=0)
        return AddStocksPortfolio
    
    def simpleOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        Portfolio[["Portfolio","S&P500"]] = Portfolio[["Portfolio","S&P500"]].fillna(0)
        overlap = np.minimum(np.array(Portfolio["Portfolio"]),np.array(Portfolio["S&P500"]))
        overlap = overlap.sum()
        total = Portfolio.Portfolio.sum()
        return overlap/total*100
    
    def addStocksOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        AddStocksPortfolio = self.makeAddStocksPortfolio(Portfolio)
        return 100/AddStocksPortfolio.Portfolio.sum()*100
        
    def evaluateAssetOverlap(self,weights):
        #self.weights = pd.DataFrame(np.ones((49,1))*-1000,index=self.getFundNames(self.funds))
        return self.simpleOverlap(weights)
        
        
        
        

In [21]:
class Discount:
    def __init__(self):
        self.readDiscount()
    def readDiscount(self):
        self.discount = pd.read_csv("../input/Discounts.csv")
        self.discount[["Discount", "52W Discount","Effective","Distribution"]] = self.discount.apply(lambda x: [x["Discount"].strip("%"),x["52W Discount"].strip("%"), x["Effective"].strip("%"), x["Distribution"].strip("%")], axis=1, result_type='expand')
        self.discount = self.discount.replace("--",0)
        self.discount.iloc[:,1:] = self.discount.iloc[:,1:].astype("float")
        
        
        longterm = self.discount["52W Discount"]-self.discount["Discount"] #max: a pos 52 week is pos, a neg discount is more pos
        longterm = longterm/np.max(longterm) *100# between -inf and 1
        self.discount["longterm"] = longterm
        current = -self.discount["Discount"] # max: a neg discount is pos
        current = current/np.max(-self.discount["Discount"]) * 100 #between -inf and 1
        self.discount["value"] = np.sum([longterm,2*current],axis=0)/3
        self.discount = self.discount.set_index("Ticker")
    
        
    def getWeightedDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.Discount).reshape(len(self.discount),1)
        return np.matmul(weights,values)[0][0]
    
    def evaluateDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.value).reshape(len(self.discount),1)
        
        return np.dot(weights,values)[0][0]
        
          

In [28]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        self.fundlist = []
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        fund1 = random.choice(self.weights.index)
        self.fundlist.append([fund1, fund2])
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()
        
        for x in range(1000):
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0 or fund1==fund2: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                print(fund1, self.weights.loc[fund1][0],fund2, self.weights.loc[fund2][0])
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

In [29]:
obj = runModel()

BUI 0.024541988484716414 BDJ 0.018117524126924855
BGY 0.03044451770479396 ETJ 0.029249184209464842
BMEZ 0.014525342345777119 BOE 0.022893353729791063
BGY 0.0 IAF 0.018738017068681563
BOE 0.022893353729791063 BIF 0.009048099646032343
CEM 0.024332865762016254 CRF 0.07740971281284248
IAF 0.018738017068681563 BME 0.02949345788027417
BSTZ 0.03920676575684921 CET 0.013201034564297554
CEN 0.0 BIF 0.009048099646032343
BSTZ 0.03920676575684921 EOI 0.03156502356812536
AGD 0.036294042498548146 CPZ 0.02945338805251857
AWP 0.0 PEO 0.04100747417873041
BCX 0.035794497399686924 BGR 0.039314562480509116
AGD 0.036294042498548146 EOI 0.03156502356812536
CRF 0.12068971836624044 BIF 0.009048099646032343
EMO 0.0 RNP 0.017464123692887072
UTF 0.009782106065742167 EXD 0.027287498465078316
EMO 0.03 BCX 0.00579449739968692
CII 0.03616690810581928 GRF 0.024472859863820817
BMEZ 0.0 BDJ 0.04687989164240376
CTR 0.022785852806413355 AIO 0.03360117799194339
DPG 0.0 GRF 0.024472859863820817
AWP 0.03 BGR 0.0093145624805

EOS 0.1336034283294338 BOE 0.019096668546251278
EXD 0.0 DSE 0.01
CET 0.21425419187447067 ADX 0.09104469705619915
DPG 0.0 CET 0.21425419187447067
RQI 0.0 CEN 0.04408547515438262
BGY 0.0 SZC 0.029206765756849204
RNP 0.0 AWP 0.01
IGR 0.0 STK 0.01509192841367934
MIE 0.0 BOE 0.019096668546251278
BME 0.0 CET 0.21425419187447067
RNP 0.0 CII 0.07332110139018201
EMO 0.0 PEO 0.05100747417873041
EOI 0.1287482515315498 CET 0.21425419187447067
DNP 0.0 GRF 0.009999999999999997
BME 0.0 AWP 0.01
PEO 0.05100747417873041 CEN 0.04408547515438262
AIO 0.0 PEO 0.05100747417873041
CTR 0.0 ADX 0.09104469705619915
AEF 0.0 BDJ 0.0647852227232267
ASA 0.0 DSE 0.01
CEM 0.0 PEO 0.05100747417873041
BGR 0.0 CET 0.21425419187447067
CEN 0.04408547515438262 BIF 0.019048099646032338
CEN 0.04408547515438262 CII 0.07332110139018201
JEQ 0.0 CII 0.07332110139018201
IGR 0.0 BOE 0.019096668546251278
CLM 0.0 AWP 0.01
JEQ 0.0 SZC 0.029206765756849204
AIO 0.0 ADX 0.09104469705619915
AWP 0.01 ADX 0.09104469705619915
SRV 0.0 EOS 0.

CLM 0.0 SZC 0.029206765756849204
BUI 0.0 GRF 0.060000000000000005
UTF 0.0 BDJ 0.0447852227232267
JEQ 0.0 BDJ 0.0447852227232267
ADX 0.3567994921012443 EOS 0.03360342832943384
ADX 0.42679949210124435 CII 0.013321101390182008
EOI 0.07874825153154984 GRF 0.05
UTF 0.0 GRF 0.05
CLM 0.0 EOI 0.07874825153154984
BOE 0.019096668546251278 GRF 0.05
CEM 0.0 GRF 0.05
CTR 0.0 CEN 0.0040854751543826125
SRV 0.0 BOE 0.019096668546251278
GRF 0.05 CII 0.013321101390182008
BMEZ 0.0 BIF 0.02
BOE 0.02909666854625128 STK 0.00509192841367934
PEO 0.04100747417873041 ADX 0.41679949210124434
ASA 0.0 BIF 0.02
AWP 0.0 CII 0.013321101390182008
UTF 0.0 ADX 0.41679949210124434
CHN 0.0 CET 0.2342541918744707
MIE 0.0 SZC 0.029206765756849204
CEM 0.0 PEO 0.04100747417873041
CHN 0.0 BDJ 0.0447852227232267
CEN 0.0040854751543826125 EOS 0.03360342832943384
MIE 0.0 EOI 0.07874825153154984
CTR 0.0 ADX 0.41679949210124434
RNP 0.0 BOE 0.02909666854625128
ETJ 0.0 EOS 0.03360342832943384
PEO 0.05100747417873041 BIF 0.01
CRF 0.0 

In [9]:
obj = runModel()

63.268865125635585
77.59191712492408
-12.897512635273241


In [11]:
obj = runModel()

65.31939501017541
78.80769565806463
-11.9176216875426


In [30]:
obj.fundlist

[['AWP', 'JEQ'],
 ['CRF', 'MIE'],
 ['BUI', 'BDJ'],
 ['BGY', 'ETJ'],
 ['BDJ', 'SRV'],
 ['CET', 'AIO'],
 ['BGR', 'AEF'],
 ['STK', 'BGY'],
 ['BMEZ', 'BOE'],
 ['BGY', 'IAF'],
 ['CRF', 'AWP'],
 ['BOE', 'BIF'],
 ['CEM', 'CRF'],
 ['BOE', 'CEN'],
 ['AGD', 'EMO'],
 ['MIE', 'BMEZ'],
 ['IAF', 'BME'],
 ['CRF', 'BUI'],
 ['CRF', 'IAF'],
 ['AIO', 'CLM'],
 ['BSTZ', 'CET'],
 ['CEN', 'BIF'],
 ['AIO', 'DNP'],
 ['BSTZ', 'EOI'],
 ['PEO', 'RQI'],
 ['EOS', 'BOE'],
 ['AGD', 'CPZ'],
 ['AWP', 'PEO'],
 ['CPZ', 'RFI'],
 ['BCX', 'BGR'],
 ['AGD', 'EOI'],
 ['CRF', 'BIF'],
 ['EMO', 'RNP'],
 ['EOS', 'DPG'],
 ['UTF', 'EXD'],
 ['CII', 'IGR'],
 ['EMO', 'BCX'],
 ['AIO', 'GLQ'],
 ['CII', 'GRF'],
 ['BMEZ', 'BDJ'],
 ['CTR', 'AIO'],
 ['DPG', 'GRF'],
 ['BDJ', 'DSE'],
 ['GLQ', 'CEM'],
 ['ADX', 'CHN'],
 ['CTR', 'ASA'],
 ['AWP', 'BGR'],
 ['STK', 'NFJ'],
 ['AEF', 'AGD'],
 ['CET', 'BDJ'],
 ['EOI', 'STK'],
 ['CHN', 'PEO'],
 ['CRF', 'GRF'],
 ['JEQ', 'SZC'],
 ['BDJ', 'BME'],
 ['AIO', 'GLQ'],
 ['NFJ', 'BDJ'],
 ['EXD', 'NFJ'],
 ['EXD', 

In [199]:
FundAssets().addStocksOverlap()

61.39535179302859

In [198]:
FundAssets().simpleOverlap()

33.12524075359048

In [89]:
a = FundAssets().Portfolio.groupby("Company").agg({"S&P500":'count'})

STK
0
ADX
1
CHN
1
MIE
3
SZC
14


In [82]:
a.groupby("Company").sum()

Unnamed: 0_level_0,S&P500
Company,Unnamed: 1_level_1
3M Company,1
A. O. Smith Corporation,1
ABIOMED Inc.,1
AES Corporation,1
AMETEK Inc.,1
...,...
Zimmer Biomet Holdings Inc.,1
Zions Bancorporation N.A.,1
Zoetis Inc. Class A,1
eBay Inc.,1


In [115]:
dict = {x:"sum" for x in range(2)}
dict.update({"h":'h'})

In [2]:
dict(zip([1,2],["sum","sum"]))

{1: 'sum', 2: 'sum'}

In [1]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.n = .1
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*-self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        fund1 = random.choice(self.weights.index)
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()

        for x in range(100):
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0 or fund1==fund2: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

{'a': 1, 'b': 2, 'c': 3}


In [None]:
# class runModel(AssetOverlap, Discount):
#     def __init__(self):
#         self.readFunds()
#         self.readSP500()
#         self.readDiscount()
        
#         N = len(self.getFundNames(self.funds))
#         weights = np.random.uniform(0,1.0,(N,1))
#         weights = np.divide(weights,np.sum(weights))

#         self.weights = pd.DataFrame(weights,index=self.getFundNames(self.funds))
        
#         self.n = .1
#         self.run_gradient_descent()
    
#     def run_gradient_descent(self):
#         for x in range(100):
#             gradient = -self.n *self.gradient()
#             self.weights = self.weights + gradient 
#             print("k",self.evaluateAssetOverlap())
#             print(self.evaluateDiscount())
            
#     def gradient(self):
#         asset = .5*(100-self.evaluateAssetOverlap())
#         discount = .5*self.evaluateDiscount()
#         print(asset,discount)
#         return asset+discount