In [2]:
import os
import numpy as np
import pandas as pd
import random

In [3]:
class AssetOverlap():
    def __init__(self):
        self.readFunds()
        self.readSP500()

        
    def readFunds(self):
        fundDir = os.listdir("../input/funds")
        funds = pd.DataFrame(columns=["Ticker", "ID"])
        for name in fundDir:
            fund = pd.read_csv("../input/funds/"+name, index_col=0)
            name = name.split(".csv")[0]
            
            fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
            fund = fund[["ID","Ticker",name]]
            fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)] #no puts so neg percent and #N/A Unclassified: Unable to parse request at ...  and # N/A Invalid Security
            fund = fund.dropna(axis=0, how='all')
            
            fund[name] = fund[name]/(fund[name].sum())*100 # some have over 100 percent
            
            fund.Ticker = fund.Ticker.fillna("NULL TICKER"+fund.ID)
            
            fund = fund.groupby(["Ticker","ID"]).agg({name:"sum"})# you have to groupby id and ticker or else it may merge multipletimes
            
            funds = funds.merge(fund, how='outer', on=["Ticker","ID"])
        
        
        params = {fundName:"sum" for fundName in self.getFundNames(funds)}
        params.update({"ID":lambda IDS: IDS.iloc[np.argmax([np.all([not char.isdigit() for char in ID]) for ID in IDS])] }) #first id that is all characters or first id
        funds = funds.groupby("Ticker").agg(params)
        funds = funds.reset_index()

        #print(funds.Ticker.duplicated().sum())
        
        funds = funds.fillna(0)
        self.funds = funds

    def getFundMatrix(self):
        return self.funds.loc[:,~self.funds.columns.isin(["ID","Ticker"])]
    
    def readSP500(self):
        self.sp500 = pd.read_csv("../input/s&p500.csv")
        self.sp500 = self.sp500.iloc[:,:3]
        self.sp500 = self.sp500.rename(columns={"Symbol":"Ticker", "Weight":"S&P500"})
        
    def getFundNames(self,funds):
        return [x for x in funds.columns if x not in ["ID", "Ticker"]]
    
    def makePortfolio(self,weights):
        #self.weights = self.weights.loc[self.getFundNames(self.funds)]
        columns = ["Ticker","ID"] + list(weights.index)
        self.funds = self.funds[columns]

        Portfolio = self.getFundMatrix().dot(weights)
        Portfolio = Portfolio.rename({Portfolio.columns[0]:"Portfolio"},axis=1)
        Portfolio[["Ticker","ID"]] = self.funds[["Ticker","ID"]]
        
        Portfolio = pd.merge(Portfolio, self.sp500, how='outer', on="Ticker")
        Portfolio.ID = Portfolio.ID.fillna(Portfolio.Company)
        Portfolio = Portfolio.drop("Company", axis=1)
        return Portfolio
    
    def makeAddStocksPortfolio(self,Portfolio):
        dif = Portfolio["S&P500"]-Portfolio["Portfolio"] # determine which sp500 assets have more than in the portfolio
        self.extra = Portfolio
        self.extra["Dif"] = dif
        self.extra = self.extra[self.extra.Dif>0][["Ticker","ID","Dif"]]
        self.extra = self.extra.rename(columns={"Dif":"Portfolio"})
        AddStocksPortfolio = pd.concat([Portfolio[["Ticker","ID","Portfolio"]],self.extra],axis=0)
        return AddStocksPortfolio
    
    def simpleOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        Portfolio[["Portfolio","S&P500"]] = Portfolio[["Portfolio","S&P500"]].fillna(0)
        overlap = np.minimum(np.array(Portfolio["Portfolio"]),np.array(Portfolio["S&P500"]))
        overlap = overlap.sum()
        total = Portfolio.Portfolio.sum()
        return overlap/total*100
    
    def addStocksOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        AddStocksPortfolio = self.makeAddStocksPortfolio(Portfolio)
        return 100/AddStocksPortfolio.Portfolio.sum()*100
        
    def evaluateAssetOverlap(self,weights):
        #self.weights = pd.DataFrame(np.ones((49,1))*-1000,index=self.getFundNames(self.funds))
        return self.simpleOverlap(weights)
        
        
        
        

In [4]:
class Discount:
    def __init__(self):
        self.readDiscount()
    def readDiscount(self):
        self.discount = pd.read_csv("../input/Discounts.csv")
        self.discount[["Discount", "52W Discount","Effective","Distribution"]] = self.discount.apply(lambda x: [x["Discount"].strip("%"),x["52W Discount"].strip("%"), x["Effective"].strip("%"), x["Distribution"].strip("%")], axis=1, result_type='expand')
        self.discount = self.discount.replace("--",0)
        self.discount.iloc[:,1:] = self.discount.iloc[:,1:].astype("float")
        
        
        longterm = self.discount["52W Discount"]-self.discount["Discount"] #max: a pos 52 week is pos, a neg discount is more pos
        longterm = longterm/np.max(longterm) *100# between -inf and 1
        self.discount["longterm"] = longterm
        current = -self.discount["Discount"] # max: a neg discount is pos
        current = current/np.max(-self.discount["Discount"]) * 100 #between -inf and 1
        self.discount["value"] = np.sum([longterm,2*current],axis=0)/3
        self.discount = self.discount.set_index("Ticker")
    
        
    def getWeightedDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.Discount).reshape(len(self.discount),1)
        return np.matmul(weights,values)[0][0]
    
    def evaluateDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.value).reshape(len(self.discount),1)
        
        return np.dot(weights,values)[0][0]
        
          

In [5]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        self.fundlist = []
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        fund1 = random.choice(self.weights.index)
        self.fundlist.append([fund1, fund2])
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()
        
        for x in range(1000):
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0 or fund1==fund2: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                print(fund1, self.weights.loc[fund1][0],fund2, self.weights.loc[fund2][0])
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

In [6]:
obj = runModel()

RFI 0.03114983716192709 DSE 0.013013186044979069
BCX 0.0009799933509503595 CPZ 0.0011381877987538646
EOS 0.02633769010541584 CET 0.013677132192276755
AEF 0.03126554064889673 CET 0.013677132192276755
BCX 0.0 CPZ 0.0011381877987538646
EXD 0.0021592154017754613 BME 0.017558534347751314
CHN 0.02401040237049198 AEF 0.03126554064889673
BUI 0.005184496657620095 UTF 0.014242915326287768
SRV 0.029364129423688354 CPZ 0.0011381877987538646
BME 0.017558534347751314 EOS 0.0273176834563662
DPG 0.08215518534977183 CTR 0.013566616257662548
AEF 0.0 BDJ 0.03630586048293212
BGY 0.030877872869896468 EMO 0.04524383421274822
AEF 0.0 SRV 0.029364129423688354
MIE 0.04063633775224503 AOD 0.048515339544214844
AEF 0.0 EOI 0.0020238663493447476
CLM 0.00874914405225918 NFJ 0.005249637404269589
AWP 0.037720984891691206 CET 0.013677132192276755
UTF 0.014242915326287768 RNP 0.0019137039069995366
CEN 0.025100566394666488 CII 0.01901333484332141
SZC 0.0 CPZ 0.0011381877987538646
NFJ 0.005249637404269589 GRF 0.033568576

AOD 0.0 CEN 0.0398039254076582
UTF 0.0 CEN 0.0398039254076582
MIE 0.0 BOE 0.03254764366674208
PEO 0.07648843910781268 CPZ 0.053051891705753404
CEN 0.0698039254076582 PEO 0.04648843910781268
CHN 0.0 BME 0.0198283950263394
SZC 0.03 CEN 0.0698039254076582
ETJ 0.019364129423688356 EOI 0.07689603483679373
CET 0.16127259851381937 CPZ 0.023051891705753398
STK 0.01016993240489117 SZC 0.03
CTR 0.0 BOE 0.03254764366674208
EMO 0.0 AIO 0.01
JEQ 0.0 CII 0.08516939049403005
AGD 0.0 CEN 0.0698039254076582
DSE 0.0 NFJ 0.05218406199736515
GLQ 0.0 CEN 0.0698039254076582
RFI 0.0 SZC 0.03
CET 0.16127259851381937 ADX 0.12249541972929201
EOS 0.15493531126857532 CET 0.16127259851381937
UTF 0.0 PEO 0.04648843910781268
EMO 0.0 CII 0.08516939049403005
AWP 0.0 CEN 0.0698039254076582
DNP 0.0 EOS 0.15493531126857532
CPZ 0.023051891705753398 EOI 0.09626016426048209
CHN 0.0 IGR 0.008515339544214838
BIF 0.0 ADX 0.12249541972929201
RQI 0.0 CEN 0.0698039254076582
AWP 0.0 PEO 0.04648843910781268
RQI 0.0 STK 0.0101699324

BDJ 0.06370891009745579 EOS 0.11493531126857531
NFJ 0.022184061997365147 CEN 0.0698039254076582
BMEZ 0.0 STK 0.01999832743123057
BGY 0.0 EOI 0.1262601642604821
SZC 0.028515339544214834 STK 0.01999832743123057
BME 0.0 ADX 0.12249541972929201
EOI 0.1362601642604821 CEN 0.0598039254076582
GRF 0.06356857677556835 BDJ 0.04370891009745578
DSE 0.01 NFJ 0.012184061997365147
BUI 0.0 AOD 0.012184061997365147
CEN 0.0598039254076582 SZC 0.028515339544214834
IAF 0.0 BIF 0.02
IGR 0.0 CEN 0.0598039254076582
RFI 0.0 CII 0.06516939049403006
SRV 0.0 BDJ 0.04370891009745578
BSTZ 0.0 BOE 0.022547643666742077
IAF 0.0 PEO 0.04648843910781268
CEN 0.0598039254076582 EOS 0.11493531126857531
CEM 0.0 CEN 0.0598039254076582
BIF 0.02 GRF 0.06356857677556835
AWP 0.0 STK 0.01999832743123057
DPG 0.0 CET 0.2012725985138194
JEQ 0.0 BDJ 0.04370891009745578
CEN 0.0698039254076582 GRF 0.053568576775568345
CHN 0.0 CET 0.2012725985138194
IGR 0.0 AOD 0.012184061997365147
SRV 0.0 BOE 0.022547643666742077
PEO 0.046488439107812

In [9]:
obj = runModel()

63.268865125635585
77.59191712492408
-12.897512635273241


In [11]:
obj = runModel()

65.31939501017541
78.80769565806463
-11.9176216875426


In [49]:
w = obj.weights.loc[(obj.weights.index.isin(['ADX', 'SZC', 'CII', 'GRF', 'EOI', 'BDJ', 'EOS', 'CET', 'BOE', 'PEO'])) & (obj.weights.weights!=0)]
w = w/w.sum()
Discount().getWeightedDiscount(w)
AssetOverlap().addStocksOverlap(w)

78.1111325641178

In [44]:
w

Unnamed: 0,weights
ADX,0.336267
SZC,0.044094
CII,0.07184
GRF,0.059052
EOI,0.150208
BDJ,0.048183
EOS,0.071582
CET,0.154096
BOE,0.022047
PEO,0.042631


In [48]:
Discount().discount.loc[w.index]

Unnamed: 0,Effective,Distribution,Discount,52W Discount,longterm,value
ADX,0.0,20.55,-13.27,-13.74,-4.304029,36.46878
SZC,3.02,6.89,-23.32,-21.58,15.934066,71.920895
CII,0.1,5.99,-7.0,-8.3,-11.904762,16.026033
GRF,0.0,7.54,-22.63,-18.83,34.798535,76.238186
EOI,0.0,6.67,-3.52,-4.23,-6.501832,7.886993
BDJ,0.27,7.19,-9.53,-9.34,1.739927,27.80077
EOS,0.0,5.5,2.57,-0.67,-29.67033,-17.23087
CET,0.32,9.43,-18.61,-17.49,10.25641,56.575044
BOE,0.0,7.09,-12.33,-12.3,0.274725,35.310084
PEO,0.7,14.84,-17.04,-13.88,28.937729,58.317718


In [8]:
obj.weights.sort_values(by="weights",ascending=False)

Unnamed: 0,weights
ADX,0.305043
CET,0.139788
EOI,0.13626
CII,0.065169
EOS,0.064935
GRF,0.053569
BDJ,0.043709
SZC,0.04
CEN,0.039804
PEO,0.038673


In [199]:
FundAssets().addStocksOverlap()

61.39535179302859

In [35]:
Discount().discount

Unnamed: 0_level_0,Effective,Distribution,Discount,52W Discount,longterm,value
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ADX,0.0,20.55,-13.27,-13.74,-4.304029,36.46878
AEF,7.9,2.64,-12.89,-14.09,-10.989011,33.155048
AGD,6.94,7.51,-12.7,-13.11,-3.754579,35.023824
AIO,2.95,5.05,-6.89,-10.25,-30.769231,9.423681
AOD,0.09,7.84,-13.81,-13.28,4.85348,41.063699
ASA,0.0,0.09,-15.96,-15.45,4.67033,47.143752
AWP,7.68,9.28,-12.22,-13.72,-13.736264,30.325558
BCX,1.2,6.63,-14.22,-15.29,-9.798535,37.350788
BDJ,0.27,7.19,-9.53,-9.34,1.739927,27.80077
BGR,0.22,7.88,-13.84,-9.02,44.139194,54.244627


In [89]:
a = FundAssets().Portfolio.groupby("Company").agg({"S&P500":'count'})

STK
0
ADX
1
CHN
1
MIE
3
SZC
14


In [82]:
a.groupby("Company").sum()

Unnamed: 0_level_0,S&P500
Company,Unnamed: 1_level_1
3M Company,1
A. O. Smith Corporation,1
ABIOMED Inc.,1
AES Corporation,1
AMETEK Inc.,1
...,...
Zimmer Biomet Holdings Inc.,1
Zions Bancorporation N.A.,1
Zoetis Inc. Class A,1
eBay Inc.,1


In [115]:
dict = {x:"sum" for x in range(2)}
dict.update({"h":'h'})

In [2]:
dict(zip([1,2],["sum","sum"]))

{1: 'sum', 2: 'sum'}

In [1]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.n = .1
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*-self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        fund1 = random.choice(self.weights.index)
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()

        for x in range(100):
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0 or fund1==fund2: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

{'a': 1, 'b': 2, 'c': 3}


In [None]:
# class runModel(AssetOverlap, Discount):
#     def __init__(self):
#         self.readFunds()
#         self.readSP500()
#         self.readDiscount()
        
#         N = len(self.getFundNames(self.funds))
#         weights = np.random.uniform(0,1.0,(N,1))
#         weights = np.divide(weights,np.sum(weights))

#         self.weights = pd.DataFrame(weights,index=self.getFundNames(self.funds))
        
#         self.n = .1
#         self.run_gradient_descent()
    
#     def run_gradient_descent(self):
#         for x in range(100):
#             gradient = -self.n *self.gradient()
#             self.weights = self.weights + gradient 
#             print("k",self.evaluateAssetOverlap())
#             print(self.evaluateDiscount())
            
#     def gradient(self):
#         asset = .5*(100-self.evaluateAssetOverlap())
#         discount = .5*self.evaluateDiscount()
#         print(asset,discount)
#         return asset+discount