In [2]:
import os
import numpy as np
import pandas as pd
import random

In [329]:
class AssetOverlap():
    def __init__(self):
        self.readFunds()
        self.readSP500()

        
    def readFunds(self):
        fundDir = os.listdir("../input/funds")
        funds = pd.DataFrame(columns=["Ticker", "ID"])
        for name in fundDir:
            fund = pd.read_csv("../input/funds/"+name, index_col=0)
            name = name.split(".csv")[0]
            
            fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
            fund = fund[["ID","Ticker",name]]
            fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)] #no puts so neg percent and #N/A Unclassified: Unable to parse request at ...  and # N/A Invalid Security
            fund = fund.dropna(axis=0, how='all')
            
            fund[name] = fund[name]/(fund[name].sum())*100 # some have over 100 percent
            
            fund.Ticker = fund.Ticker.fillna("NULL TICKER"+fund.ID)
            
            fund = fund.groupby(["Ticker","ID"]).agg({name:"sum"})# you have to groupby id and ticker or else it may merge multipletimes
            
            funds = funds.merge(fund, how='outer', on=["Ticker","ID"])
        
        
        params = {fundName:"sum" for fundName in self.getFundNames(funds)}
        params.update({"ID":lambda IDS: IDS.iloc[np.argmax([np.all([not char.isdigit() for char in ID]) for ID in IDS])] }) #first id that is all characters or first id
        funds = funds.groupby("Ticker").agg(params)
        funds = funds.reset_index()

        #print(funds.Ticker.duplicated().sum())
        
        funds = funds.fillna(0)
        self.funds = funds

    def getFundMatrix(self):
        return self.funds.loc[:,~self.funds.columns.isin(["ID","Ticker"])]
    
    def readSP500(self):
        self.sp500 = pd.read_csv("../input/s&p500.csv")
        self.sp500 = self.sp500.iloc[:,:3]
        self.sp500 = self.sp500.rename(columns={"Symbol":"Ticker", "Weight":"S&P500"})
        
    def getFundNames(self,funds):
        return [x for x in funds.columns if x not in ["ID", "Ticker"]]
    
    def makePortfolio(self,weights):
        #self.weights = self.weights.loc[self.getFundNames(self.funds)]
        columns = ["Ticker","ID"] + list(weights.index)
        self.funds = self.funds[columns]

        Portfolio = self.getFundMatrix().dot(weights)
        Portfolio = Portfolio.rename({Portfolio.columns[0]:"Portfolio"},axis=1)
        Portfolio[["Ticker","ID"]] = self.funds[["Ticker","ID"]]
        
        Portfolio = pd.merge(Portfolio, self.sp500, how='outer', on="Ticker")
        Portfolio.ID = Portfolio.ID.fillna(Portfolio.Company)
        Portfolio = Portfolio.drop("Company", axis=1)
        return Portfolio
    
    def makeAddStocksPortfolio(self,Portfolio):
        dif = Portfolio["S&P500"]-Portfolio["Portfolio"] # determine which sp500 assets have more than in the portfolio
        self.extra = Portfolio
        self.extra["Dif"] = dif
        self.extra = self.extra[self.extra.Dif>0][["Ticker","ID","Dif"]]
        self.extra = self.extra.rename(columns={"Dif":"Portfolio"})
        AddStocksPortfolio = pd.concat([Portfolio[["Ticker","ID","Portfolio"]],self.extra],axis=0)
        return AddStocksPortfolio
    
    def simpleOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        Portfolio[["Portfolio","S&P500"]] = Portfolio[["Portfolio","S&P500"]].fillna(0)
        overlap = np.minimum(np.array(Portfolio["Portfolio"]),np.array(Portfolio["S&P500"]))
        overlap = overlap.sum()
        total = Portfolio.Portfolio.sum()
        return overlap/total*100
    
    def addStocksOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        AddStocksPortfolio = self.makeAddStocksPortfolio(Portfolio)
        return 100/AddStocksPortfolio.Portfolio.sum()*100
        
    def evaluateAssetOverlap(self,weights):
        #self.weights = pd.DataFrame(np.ones((49,1))*-1000,index=self.getFundNames(self.funds))
        return self.simpleOverlap(weights)
        
        
        
        

In [156]:
class Discount:
    def __init__(self):
        self.readDiscount()
    def readDiscount(self):
        self.discount = pd.read_csv("../input/Discounts.csv")
        self.discount[["Discount", "52W Discount","Effective","Distribution"]] = self.discount.apply(lambda x: [x["Discount"].strip("%"),x["52W Discount"].strip("%"), x["Effective"].strip("%"), x["Distribution"].strip("%")], axis=1, result_type='expand')
        self.discount = self.discount.replace("--",0)
        self.discount.iloc[:,1:] = self.discount.iloc[:,1:].astype("float")
        
        longterm = self.discount["52W Discount"]-self.discount["Discount"] #max: a pos 52 week is pos, a neg discount is more pos
        longterm = longterm/np.max(longterm) *100# between -inf and 1
        self.discount["longterm"] = longterm
        current = -self.discount["Discount"] # max: a neg discount is pos
        current = current/np.max(-self.discount["Discount"]) * 100 #between -inf and 1
        self.discount["value"] = np.sum([longterm,2*current],axis=0)/3
        self.discount = self.discount.set_index("Ticker")
    
    def getWeightedDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.Discount).reshape(len(self.discount),1)
        return np.matmul(weights,values)[0][0]
    
    def evaluateDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.value).reshape(len(self.discount),1)
        
        return np.dot(weights,values)[0][0]
        
          

In [None]:
class TabuSet:
    def inTabuSet(self,option):
        tabuList = [adjustment.getFunds() for adjustment in self.TabuSet]
        return [option.fund1,option.fund2] in tabuList or [option.fund2,option.fund1] in tabuList

In [None]:
class Adjustment():
    def __init__(fund1,fund2,value,weights):
        self.fund1 = fund1
        self.fund2 = fund2
        self.value = value
        self.weights = weights
    def getValue(self):
        return self.value
    def getWeights(self):
        return self.weights
    def getFunds(self):
        return [self.fund1, self.fund2]

In [336]:
class runModel(AssetOverlap, Discount,TabuSet):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        #self.findInitialBest()
    
    def Evaluate(self,weights):
        asset = 1*(self.evaluateAssetOverlap(weights))
        discount = 0*-self.evaluateDiscount(weights)
        return asset+discount
    
    def changeWeights(self,fund1,fund2,weights,change):
        change = min(weights.loc[fund2][0],change)
        weights.loc[fund1,"weights"] += change
        weights.loc[fund2,"weights"] -= change
        return weights
    
    def EvaluateChange(self,fund1,fund2,weights,change):
        weights = self.changeWeights(fund1,fund2,weights,change)
        return self.Evaluate(weights)
    
    def findIdealChange(self,fund1,fund2,weights,change):
        control = self.Evaluate(weights)
        while self.EvaluateChange(fund1,fund2,weights,change)>control and self.weights.loc[fund2]!=0:
            weights = self.changeWeights()
        return control, weights
    
    def makeAdjustment(self,fund1,fund2,weights,change):
        control = self.Evaluate(weights)
        up = self.EvaluateChange(fund1,fund2,weights,change)
        down = self.EvaluateChange(fund2,fund1,weights,change)
        
        if down>control and down>up:
            fund1, fund2 = fund2, fund1
        elif control>up:
            return control,weights
        
        weights = self.changeWeights(fund1,fund2,weights,change)
        value, weights = self.findIdealChange(fund1,fund2,weights,change)
        
        return value, weights
    
    def randomFunds(self):
        fund2 = random.choice(self.weights.index)
        fund1 = random.choice(self.weights.index)
        return fund1, fund2
    
    def getOption(self,fund1,fund2):
        change = .1
        value, weights = self.makeAdjustment(fund1, fund2, self.weights, .1)
        return Adjustment(fund1=fund1,fund2=fund2,value=value,weights=weights)
    
            
    def getOptions(self):
        n = 10
        options = [self.randomFunds() for x in range(n)]
        tabuLessOptions = []
        for option in options: 
            if self.inTabuSet(option): tabuLessOptions.append(option)
        options = [self.getOption(fund1,fund2) for fund1,fund2 in tabuLessOptions]
        option = tabuLessOptions[np.argmax([option.getValue() for option in tabuLessOptions])]
        self.weights = option.getWeights()
        self.TabuSet.append(option)
        

In [350]:
np.array([False,"hey"])[False,False]

array([], shape=(0, 2), dtype='<U5')

In [352]:
languages = ['Java', 'Python', 'JavaScript']
versions = [14, 3, 6]

result = dict(zip(languages, versions))
result.values()

dict_values([14, 3, 6])

In [341]:
obj = runModel()
obj.trial()

35.66692317114405
0.008299854271928309 0.0002510616573965859
35.67296045982042
35.67296045982042
0.008550915929324894 0.0


In [110]:
obj.weights.sum()

weights    1.0
dtype: float64

In [480]:
null = funds[funds["Ticker"].str[:4]=="NULL"]
notnull = funds[funds["Ticker"].str[:4]!="NULL"].groupby("ID")["Ticker"].first()
null.merge(notnull, how="left", left_on="ID", right_index=True)
#no overlap between null and having ticker



#tickers where there is more than one id
k = funds.groupby(["Ticker"])["ID"].count().to_frame()
t = funds.groupby(["Ticker","ID"])["BUI"].count()
t[t.index.get_level_values("Ticker").isin(k[k.ID>1].index)].reset_index().groupby("Ticker")["ID"].apply(hey)

Unnamed: 0,weights
STK,0.029072
ADX,0.223149
CHN,0.0
MIE,0.0
SZC,0.0
CII,0.049814
GRF,0.017187
BCX,0.0
CRF,0.15304
CEN,0.0


In [403]:
Discount().discount

Unnamed: 0_level_0,Effective,Distribution,Discount,52W Discount,value
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ADX,0.0,20.55,-13.27,-13.74,-26.275577
AEF,7.9,2.64,-12.89,-14.09,-22.119033
AGD,6.94,7.51,-12.7,-13.11,-25.329223
AIO,2.95,5.05,-6.89,-10.25,0.624547
AOD,0.09,7.84,-13.81,-13.28,-32.011144
ASA,0.0,0.09,-15.96,-15.45,-36.525396
AWP,7.68,9.28,-12.22,-13.72,-19.310103
BCX,1.2,6.63,-14.22,-15.29,-25.563458
BDJ,0.27,7.19,-9.53,-9.34,-21.285559
BGR,0.22,7.88,-13.84,-9.02,-51.718269


In [199]:
FundAssets().addStocksOverlap()

61.39535179302859

In [198]:
FundAssets().simpleOverlap()

33.12524075359048

In [89]:
a = FundAssets().Portfolio.groupby("Company").agg({"S&P500":'count'})

STK
0
ADX
1
CHN
1
MIE
3
SZC
14


In [115]:
dict = {x:"sum" for x in range(2)}
dict.update({"h":'h'})

In [2]:
dict(zip([1,2],["sum","sum"]))

{1: 'sum', 2: 'sum'}

In [1]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.n = .1
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*-self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        fund1 = random.choice(self.weights.index)
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()

        for x in range(100):
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0 or fund1==fund2: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

{'a': 1, 'b': 2, 'c': 3}


In [None]:
# class runModel(AssetOverlap, Discount):
#     def __init__(self):
#         self.readFunds()
#         self.readSP500()
#         self.readDiscount()
        
#         N = len(self.getFundNames(self.funds))
#         weights = np.random.uniform(0,1.0,(N,1))
#         weights = np.divide(weights,np.sum(weights))

#         self.weights = pd.DataFrame(weights,index=self.getFundNames(self.funds))
        
#         self.n = .1
#         self.run_gradient_descent()
    
#     def run_gradient_descent(self):
#         for x in range(100):
#             gradient = -self.n *self.gradient()
#             self.weights = self.weights + gradient 
#             print("k",self.evaluateAssetOverlap())
#             print(self.evaluateDiscount())
            
#     def gradient(self):
#         asset = .5*(100-self.evaluateAssetOverlap())
#         discount = .5*self.evaluateDiscount()
#         print(asset,discount)
#         return asset+discount