In [305]:
import os
import numpy as np
import pandas as pd
import random

In [360]:
class AssetOverlap():
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.evaluateAssetOverlap()

        
    def readFunds(self):
        fundDir = os.listdir("../input/funds")
        funds = pd.DataFrame(columns=["Ticker", "ID"])
        for name in fundDir:
            fund = pd.read_csv("../input/funds/"+name, index_col=0)
            name = name.split(".csv")[0]
            fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
            fund = fund[["ID","Ticker",name]]
            fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)]
            fund = fund.dropna(axis=0, how='all')
            fund = fund.groupby(["Ticker","ID"]).agg({name:"sum"})
            funds = funds.merge(fund, how='outer', on=["Ticker","ID"])
            
            params = {fundName:"sum" for fundName in self.getFundNames(funds)}
            params.update({"ID":"first"})
            funds = funds.groupby("Ticker").agg(params)
            funds = funds.reset_index()

            #print(funds.Ticker.duplicated().sum())
        
        funds = funds.fillna(0)
        self.funds = funds

    def getFundMatrix(self):
        return self.funds.loc[:,~self.funds.columns.isin(["ID","Ticker"])]
    
    def readSP500(self):
        self.sp500 = pd.read_csv("../input/s&p500.csv")
        self.sp500 = self.sp500.iloc[:,:3]
        self.sp500 = self.sp500.rename(columns={"Symbol":"Ticker", "Weight":"S&P500"})
        
    def getFundNames(self,funds):
        return [x for x in funds.columns if x not in ["ID", "Ticker"]]
    
    def makePortfolio(self,weights):
        #self.weights = self.weights.loc[self.getFundNames(self.funds)]
        columns = ["Ticker","ID"] + list(weights.index)
        self.funds = self.funds[columns]

        Portfolio = self.getFundMatrix().dot(weights)
        Portfolio = Portfolio.rename({Portfolio.columns[0]:"Portfolio"},axis=1)
        Portfolio[["Ticker","ID"]] = self.funds[["Ticker","ID"]]
        
        Portfolio = pd.merge(Portfolio, self.sp500, how='outer', on="Ticker")
        Portfolio.ID = Portfolio.ID.fillna(Portfolio.Company)
        Portfolio = Portfolio.drop("Company", axis=1)
        return Portfolio
    
    def makeAddStocksPortfolio(self,Portfolio):
        dif = Portfolio["S&P500"]-Portfolio["Portfolio"] # determine which sp500 assets have more than in the portfolio
        self.extra = Portfolio
        self.extra["Dif"] = dif
        self.extra = self.extra[self.extra.Dif>0][["Ticker","ID","Dif"]]
        self.extra = self.extra.rename(columns={"Dif":"Portfolio"})
        AddStocksPortfolio = pd.concat([Portfolio[["Ticker","ID","Portfolio"]],self.extra],axis=0)
        return AddStocksPortfolio
    
    def simpleOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        Portfolio[["Portfolio","S&P500"]] = Portfolio[["Portfolio","S&P500"]].fillna(0)
        overlap = np.minimum(np.array(Portfolio["Portfolio"]),np.array(Portfolio["S&P500"]))
        overlap = overlap.sum()
        total = Portfolio.Portfolio.sum()
        return overlap/total*100
    
    def addStocksOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        AddStocksPortfolio = self.makeAddStocksPortfolio(Portfolio)
        return 100/AddStocksPortfolio.Portfolio.sum()*100
        
    def evaluateAssetOverlap(self,weights):
        #self.weights = pd.DataFrame(np.ones((49,1))*-1000,index=self.getFundNames(self.funds))
        return self.simpleOverlap(weights)
        
        
        
        

In [438]:
class Discount:
    def __init__(self):
        self.readDiscount()
    def readDiscount(self):
        self.discount = pd.read_csv("../input/Discounts.csv")
        self.discount[["Discount", "52W Discount","Effective","Distribution"]] = self.discount.apply(lambda x: [x["Discount"].strip("%"),x["52W Discount"].strip("%"), x["Effective"].strip("%"), x["Distribution"].strip("%")], axis=1, result_type='expand')
        self.discount = self.discount.replace("--",0)
        self.discount.iloc[:,1:] = self.discount.iloc[:,1:].astype("float")
        
        longterm = -self.discount["52W Discount"]-(-self.discount["Discount"])
        longterm = longterm/-np.min(longterm)*100
        current = self.discount["Discount"]/-np.min(self.discount["Discount"])*100
        self.discount["value"] = np.sum([longterm,current],axis=0)/2
        self.discount = self.discount.set_index("Ticker")
    
    def getWeightedDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.Discount).reshape(len(self.discount),1)
        return np.matmul(weights,values)[0][0]
    
    def evaluateDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.value).reshape(len(self.discount),1)
        
        return np.dot(weights,values)[0][0]
        
          

In [456]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.n = .1
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*-self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        
        fund1 = random.choice(self.weights.index)
        while fund1 != fund2:
            fund1 = random.choice(self.weights.index)
            
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()
        
        count = 0
        for x in range(10000):
            count += 1
            if count % 100 == 0: print(count)
                
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

In [457]:
obj = runModel()

100
200
300
400
500
600
700
800
900
1000
30.503981490876793
60.16355962476677
-9.430175257093008


In [449]:
obj.weights

Unnamed: 0,weights
STK,0.01
ADX,0.436284
CHN,0.0
MIE,0.0
SZC,0.0
CII,0.035553
GRF,0.039827
BCX,0.0
CRF,0.0
CEN,0.031164


In [403]:
Discount().discount

Unnamed: 0_level_0,Effective,Distribution,Discount,52W Discount,value
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ADX,0.0,20.55,-13.27,-13.74,-26.275577
AEF,7.9,2.64,-12.89,-14.09,-22.119033
AGD,6.94,7.51,-12.7,-13.11,-25.329223
AIO,2.95,5.05,-6.89,-10.25,0.624547
AOD,0.09,7.84,-13.81,-13.28,-32.011144
ASA,0.0,0.09,-15.96,-15.45,-36.525396
AWP,7.68,9.28,-12.22,-13.72,-19.310103
BCX,1.2,6.63,-14.22,-15.29,-25.563458
BDJ,0.27,7.19,-9.53,-9.34,-21.285559
BGR,0.22,7.88,-13.84,-9.02,-51.718269


In [199]:
FundAssets().addStocksOverlap()

61.39535179302859

In [198]:
FundAssets().simpleOverlap()

33.12524075359048

In [89]:
a = FundAssets().Portfolio.groupby("Company").agg({"S&P500":'count'})

STK
0
ADX
1
CHN
1
MIE
3
SZC
14


In [82]:
a.groupby("Company").sum()

Unnamed: 0_level_0,S&P500
Company,Unnamed: 1_level_1
3M Company,1
A. O. Smith Corporation,1
ABIOMED Inc.,1
AES Corporation,1
AMETEK Inc.,1
...,...
Zimmer Biomet Holdings Inc.,1
Zions Bancorporation N.A.,1
Zoetis Inc. Class A,1
eBay Inc.,1


In [115]:
dict = {x:"sum" for x in range(2)}
dict.update({"h":'h'})

In [2]:
dict(zip([1,2],["sum","sum"]))

{1: 'sum', 2: 'sum'}

In [1]:
keys = ['a', 'b', 'c']
values = [1, 2, 3]
dictionary = dict(zip(keys, values))
print(dictionary) # {'a': 1, 'b': 2, 'c': 3}

{'a': 1, 'b': 2, 'c': 3}


In [None]:
# class runModel(AssetOverlap, Discount):
#     def __init__(self):
#         self.readFunds()
#         self.readSP500()
#         self.readDiscount()
        
#         N = len(self.getFundNames(self.funds))
#         weights = np.random.uniform(0,1.0,(N,1))
#         weights = np.divide(weights,np.sum(weights))

#         self.weights = pd.DataFrame(weights,index=self.getFundNames(self.funds))
        
#         self.n = .1
#         self.run_gradient_descent()
    
#     def run_gradient_descent(self):
#         for x in range(100):
#             gradient = -self.n *self.gradient()
#             self.weights = self.weights + gradient 
#             print("k",self.evaluateAssetOverlap())
#             print(self.evaluateDiscount())
            
#     def gradient(self):
#         asset = .5*(100-self.evaluateAssetOverlap())
#         discount = .5*self.evaluateDiscount()
#         print(asset,discount)
#         return asset+discount