In [2]:
import os
import numpy as np
import pandas as pd
import random

In [195]:
class AssetOverlap():
    def __init__(self):
        self.readFunds()
        self.readSP500()

        
    def readFunds(self):
        fundDir = os.listdir("../input/funds")
        funds = pd.DataFrame(columns=["Ticker", "ID"])
        for name in fundDir:
            fund = pd.read_csv("../input/funds/"+name, index_col=0)
            name = name.split(".csv")[0]
            
            fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
            fund = fund[["ID","Ticker",name]]
            fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)] #no puts so neg percent and #N/A Unclassified: Unable to parse request at ...  and # N/A Invalid Security
            fund = fund.dropna(axis=0, how='all')
            
            fund[name] = fund[name]/(fund[name].sum())*100 # some have over 100 percent
            
            fund.Ticker = fund.Ticker.fillna("NULL TICKER"+fund.ID)
            
            fund = fund.groupby(["Ticker","ID"]).agg({name:"sum"})# you have to groupby id and ticker or else it may merge multipletimes
            
            funds = funds.merge(fund, how='outer', on=["Ticker","ID"])
            
            params = {fundName:"sum" for fundName in self.getFundNames(funds)}
            params.update({"ID":"first"})
            funds = funds.groupby("Ticker").agg(params)
            funds = funds.reset_index()

            #print(funds.Ticker.duplicated().sum())
        
        funds = funds.fillna(0)
        print(funds.sum())
        self.funds = funds

    def getFundMatrix(self):
        return self.funds.loc[:,~self.funds.columns.isin(["ID","Ticker"])]
    
    def readSP500(self):
        self.sp500 = pd.read_csv("../input/s&p500.csv")
        self.sp500 = self.sp500.iloc[:,:3]
        self.sp500 = self.sp500.rename(columns={"Symbol":"Ticker", "Weight":"S&P500"})
        
    def getFundNames(self,funds):
        return [x for x in funds.columns if x not in ["ID", "Ticker"]]
    
    def makePortfolio(self,weights):
        #self.weights = self.weights.loc[self.getFundNames(self.funds)]
        columns = ["Ticker","ID"] + list(weights.index)
        self.funds = self.funds[columns]

        Portfolio = self.getFundMatrix().dot(weights)
        print(Portfolio.sum())
        Portfolio = Portfolio.rename({Portfolio.columns[0]:"Portfolio"},axis=1)
        Portfolio[["Ticker","ID"]] = self.funds[["Ticker","ID"]]
        
        Portfolio = pd.merge(Portfolio, self.sp500, how='outer', on="Ticker")
        Portfolio.ID = Portfolio.ID.fillna(Portfolio.Company)
        Portfolio = Portfolio.drop("Company", axis=1)
        return Portfolio
    
    def makeAddStocksPortfolio(self,Portfolio):
        dif = Portfolio["S&P500"]-Portfolio["Portfolio"] # determine which sp500 assets have more than in the portfolio
        self.extra = Portfolio
        self.extra["Dif"] = dif
        self.extra = self.extra[self.extra.Dif>0][["Ticker","ID","Dif"]]
        self.extra = self.extra.rename(columns={"Dif":"Portfolio"})
        AddStocksPortfolio = pd.concat([Portfolio[["Ticker","ID","Portfolio"]],self.extra],axis=0)
        return AddStocksPortfolio
    
    def simpleOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        Portfolio[["Portfolio","S&P500"]] = Portfolio[["Portfolio","S&P500"]].fillna(0)
        overlap = np.minimum(np.array(Portfolio["Portfolio"]),np.array(Portfolio["S&P500"]))
        overlap = overlap.sum()
        total = Portfolio.Portfolio.sum()
        print(total)
        return overlap/total*100
    
    def addStocksOverlap(self,weights):
        Portfolio = self.makePortfolio(weights)
        AddStocksPortfolio = self.makeAddStocksPortfolio(Portfolio)
        return 100/AddStocksPortfolio.Portfolio.sum()*100
        
    def evaluateAssetOverlap(self,weights):
        #self.weights = pd.DataFrame(np.ones((49,1))*-1000,index=self.getFundNames(self.funds))
        return self.simpleOverlap(weights)
        
        
        
        

In [221]:
fundDir = os.listdir("../input/funds")
funds = pd.DataFrame(columns=["Ticker", "ID"])
for name in fundDir:
    fund = pd.read_csv("../input/funds/"+name, index_col=0)
    name = name.split(".csv")[0]

    fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
    fund = fund[["ID","Ticker",name]]
    fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)] #no puts so neg percent and #N/A Unclassified: Unable to parse request at ...  and # N/A Invalid Security
    fund = fund.dropna(axis=0, how='all')

    fund[name] = fund[name]/(fund[name].sum())*100 # some have over 100 percent

    fund.Ticker = fund.Ticker.fillna("NULL TICKER"+fund.ID)

    fund = fund.groupby(["Ticker","ID"]).agg({name:"sum"})# you have to groupby id and ticker or else it may merge multipletimes

    funds = funds.merge(fund, how='outer', on=["Ticker","ID"])

    params = {fundName:"sum" for fundName in getFundNames(funds)}
    params.update({"ID":"first"})
#     funds = funds.groupby("Ticker").agg(params)
#     funds = funds.reset_index()

    #print(funds.Ticker.duplicated().sum())

funds = funds.fillna(0)

k = funds.groupby(["Ticker"])["ID"].count().to_frame()

t = funds.groupby(["Ticker","ID"])["BUI"].count()
t.tail(20)
#t[t.index.get_level_values(0).isin(k[k.ID>1].index)].to_frame()

Ticker  ID               
YNDX    YNDX US Equity       1
YUM     YUM US Equity        1
YUMC    YUMC US Equity       1
YY      YY US Equity         1
Z       AT341057     Corp    1
        BJ456485     Corp    1
ZBH     ZBH US Equity        1
ZBRA    ZBRA US Equity       1
ZEN     BK051856     Corp    1
        ZEN US Equity        1
ZI      ZI US Equity         1
ZLAB    ZLAB US Equity       1
ZNGA    AZ096782     Corp    1
        ZNGA US Equity       1
ZNTL    ZNTL US Equity       1
ZS      BK199428     Corp    1
        ZS US Equity         1
ZTS     ZTS US Equity        1
ZURN    ZURN SW Equity       1
ZYME    ZYME US Equity       1
Name: BUI, dtype: int64

In [269]:
def bestID(ids):
    return ids.iloc[np.argmax([np.all([not char.isdigit() for char in ID]) for ID in ids])]

In [270]:
funds.groupby("Ticker").agg({"ID":bestID})

Unnamed: 0_level_0,ID
Ticker,Unnamed: 1_level_1
000333,000333 CH Equity
000858,000858 CH Equity
002179,002179 CH Equity
002371,002371 CH Equity
002475,002475 CH Equity
...,...
ZNTL,ZNTL US Equity
ZS,ZS US Equity
ZTS,ZTS US Equity
ZURN,ZURN SW Equity


In [283]:
funds

Unnamed: 0,Ticker,ID,STK,ADX,CHN,MIE,SZC,CII,GRF,BCX,...,BMEZ,CET,UTF,BOE,IGR,PEO,DNP,RQI,AEF,BUI
0,42,0758519D US Equity,1.762070,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
1,6723,6723 JP Equity,1.364854,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
2,AAPL,AAPL US Equity,5.840233,6.361546,0.0,0.0,0.0,7.510249,0.0,0.0,...,0.0,0.933408,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
3,ADI,ADI US Equity,0.926434,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
4,AEIS,AEIS US Equity,1.265053,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2333,TCS,TCS IN Equity,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.234798,0.0
2334,TLGF,TLGF TB Equity,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.208732,0.0
2335,VIVT3,VIVT3 BZ Equity,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.129441,0.0
2336,WUBA,WUBA US Equity,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.057430,0.0


In [303]:
def hello(x):
    for y in x:
        if "NULL" not in y:
            if np.any(["NULL" in y for y in x]):
                print("WOO")
            return y
    else: 
        return x.iloc[0]
    

In [304]:
k = funds.groupby("ID")["Ticker"].apply(hello)
k

ID
000333 CH Equity     000333
000858 CH Equity     000858
002179 CH Equity     002179
002371 CH Equity     002371
002475 CH Equity     002475
                      ...  
ZS810219     Corp      MTCH
ZS891212     Corp      GDDY
ZTS US Equity           ZTS
ZURN SW Equity         ZURN
ZYME US Equity         ZYME
Name: Ticker, Length: 2338, dtype: object

Unnamed: 0,Ticker_x,ID,STK,ADX,CHN,MIE,SZC,CII,GRF,BCX,...,CET,UTF,BOE,IGR,PEO,DNP,RQI,AEF,BUI,Ticker_y
1053,NULL TICKERAFFERENT MILESTONE INTEREST COMMON ...,AFFERENT MILESTONE INTEREST COMMON STOCK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1054,NULL TICKERBRSZ39GT0 PROJECT RADAR PROJECT RAD...,BRSZ39GT0 PROJECT RADAR PROJECT RADAR COMMON B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1055,NULL TICKERBRW1R4Y73 PROJECT BEADS PROJECT BEA...,BRW1R4Y73 PROJECT BEADS PROJECT BEADS SERIES B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1056,NULL TICKERBRW5236Q2 PROJECT K 1.18 PROJECT K ...,BRW5236Q2 PROJECT K 1.18 PROJECT K SERIES B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1057,NULL TICKERBRW620EA2 PROJECT Q PROJECT Q SERIES D,BRW620EA2 PROJECT Q PROJECT Q SERIES D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1058,NULL TICKERBRW73BE07 PROJ BRIAN PROJECT BRAIN ...,BRW73BE07 PROJ BRIAN PROJECT BRAIN SERIES B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1059,NULL TICKERBRW7G6UM8 PROJECT TF CON PROJECT TF...,BRW7G6UM8 PROJECT TF CON PROJECT TF CONVERTIBLE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1060,NULL TICKERMERRILL LYNCH LIQUIDITY SER LLCMONE...,MERRILL LYNCH LIQUIDITY SER LLCMONEY MARKET SER 1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1061,NULL TICKEROAK,OAK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1540,NULL TICKER1050346D AU Equity,1050346D AU Equity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [259]:
def hey(t):
    return t.iloc[np.argmax([np.all([not x.isdigit() for x in y]) for y in t])]

In [265]:
#tickers where there is more than one id
k = funds.groupby(["Ticker"])["ID"].count().to_frame()
t = funds.groupby(["Ticker","ID"])["BUI"].count()
t[t.index.get_level_values("Ticker").isin(k[k.ID>1].index)].reset_index().groupby("Ticker")["ID"].apply(hey)

Ticker
42        0758519D US Equity
AAL            AAL LN Equity
ACACN      AH028916     Corp
ACAFP      AX329820     Corp
ACGCAP     AP547793     Corp
                 ...        
WRB         EP050832     Pfd
Z          AT341057     Corp
ZEN        BK051856     Corp
ZNGA       AZ096782     Corp
ZS         BK199428     Corp
Name: ID, Length: 297, dtype: object

In [240]:
t.iloc[-20:]

Ticker  ID               
YNDX    YNDX US Equity       1
YUM     YUM US Equity        1
YUMC    YUMC US Equity       1
YY      YY US Equity         1
Z       AT341057     Corp    1
        BJ456485     Corp    1
ZBH     ZBH US Equity        1
ZBRA    ZBRA US Equity       1
ZEN     BK051856     Corp    1
        ZEN US Equity        1
ZI      ZI US Equity         1
ZLAB    ZLAB US Equity       1
ZNGA    AZ096782     Corp    1
        ZNGA US Equity       1
ZNTL    ZNTL US Equity       1
ZS      BK199428     Corp    1
        ZS US Equity         1
ZTS     ZTS US Equity        1
ZURN    ZURN SW Equity       1
ZYME    ZYME US Equity       1
Name: BUI, dtype: int64

In [219]:
k.tail(20)

Unnamed: 0_level_0,ID
Ticker,Unnamed: 1_level_1
XPER,1
XRIFX,1
XRO,1
XTTPX,1
YNDX,1
YUM,1
YUMC,1
YY,1
Z,2
ZBH,1


In [198]:
AssetOverlap()

Ticker    0003330008580021790023710024750027470059350064...
STK                                                   100.0
ADX                                                   100.0
CHN                                                   100.0
MIE                                                   100.0
SZC                                                   100.0
CII                                                   100.0
GRF                                                   100.0
BCX                                                   100.0
CRF                                                   100.0
CEN                                                   100.0
ASA                                                   100.0
AGD                                                   100.0
AIO                                                   100.0
RFI                                                   100.0
BME                                                   100.0
CPZ                                     

<__main__.AssetOverlap at 0x11589e590>

In [153]:
 def getFundNames(funds):
    return [x for x in funds.columns if x not in ["ID", "Ticker"]]

In [185]:
fundDir = os.listdir("../input/funds")
funds = pd.DataFrame(columns=["Ticker", "ID"])

for name in fundDir[0:40]:
    fund = pd.read_csv("../input/funds/"+name, index_col=0)
    name = name.split(".csv")[0]

    fund = fund.rename(columns={fund.columns[-1]:"Ticker", "ID.WEIGHTS":name})
    fund = fund[["ID","Ticker",name]]
    fund = fund[(fund.Ticker.str[:4]!="#N/A") & (fund[name]>0)] #no shorts so neg percent and #N/A Unclassified: Unable to parse request at ...  and # N/A Invalid Security
    fund = fund.dropna(axis=0, how='all')

    fund[name] = fund[name]/(fund[name].sum())*100
    print(name, fund[name].sum())
    #fund.loc[fund["Ticker"].isnull(),"Ticker"] = "NULL TICKER"
    fund.Ticker = fund.Ticker.fillna("NULL TICKER")
    if name == "BMEZ":
        tickers = fund.groupby(["Ticker"])["Ticker"].count().index
        print(fund.Ticker.isin(tickers))
        print(fund)
    fund = fund.groupby(["Ticker"]).agg({name:"sum"})
    if name == "BMEZ":
        print(fund)
    print(name, fund[name].sum())
#     funds = funds.merge(fund, how='outer', on=["Ticker","ID"])

#     params = {fundName:"sum" for fundName in getFundNames(funds)}
#     params.update({"ID":"first"})
#     funds = funds.groupby("Ticker").agg(params)
#     funds = funds.reset_index()

    #print(funds.Ticker.duplicated().sum())

funds = funds.fillna(0)
print(funds.sum())

STK 100.00000000000001
STK 100.00000000000001
ADX 100.0
ADX 100.0
CHN 100.0
CHN 100.0
MIE 100.0
MIE 100.0
SZC 100.0
SZC 100.0
CII 100.0
CII 100.0
GRF 100.0
GRF 99.99999999999999
BCX 99.99999999999999
BCX 100.0
CRF 100.0
CRF 100.0
CEN 100.0
CEN 99.99999999999999
ASA 100.0
ASA 100.0
AGD 100.0
AGD 100.00000000000001
AIO 100.0
AIO 99.99999999999999
RFI 100.0
RFI 99.99999999999999
BME 100.00000000000001
BME 100.0
CPZ 100.00000000000001
CPZ 100.00000000000003
CEM 100.0
CEM 100.0
BSTZ 100.0
BSTZ 99.99999999999999
NFJ 100.0
NFJ 100.0
RNP 100.00000000000001
RNP 100.0
EXD 100.0
EXD 99.99999999999999
EOI 100.0
EOI 100.0
CTR 100.0
CTR 99.99999999999999
GLQ 100.0
GLQ 99.99999999999999
BGR 100.00000000000003
BGR 100.0
JEQ 100.0
JEQ 99.99999999999999
DSE 100.00000000000001
DSE 100.00000000000001
BDJ 100.00000000000001
BDJ 100.0
SRV 100.0
SRV 99.99999999999999
CLM 100.00000000000001
CLM 100.00000000000001
EMO 100.00000000000001
EMO 100.00000000000001
DPG 99.99999999999999
DPG 99.99999999999996
AWP 100

In [156]:
class Discount:
    def __init__(self):
        self.readDiscount()
    def readDiscount(self):
        self.discount = pd.read_csv("../input/Discounts.csv")
        self.discount[["Discount", "52W Discount","Effective","Distribution"]] = self.discount.apply(lambda x: [x["Discount"].strip("%"),x["52W Discount"].strip("%"), x["Effective"].strip("%"), x["Distribution"].strip("%")], axis=1, result_type='expand')
        self.discount = self.discount.replace("--",0)
        self.discount.iloc[:,1:] = self.discount.iloc[:,1:].astype("float")
        
        longterm = self.discount["52W Discount"]-self.discount["Discount"] #max: a pos 52 week is pos, a neg discount is more pos
        longterm = longterm/np.max(longterm) *100# between -inf and 1
        self.discount["longterm"] = longterm
        current = -self.discount["Discount"] # max: a neg discount is pos
        current = current/np.max(-self.discount["Discount"]) * 100 #between -inf and 1
        self.discount["value"] = np.sum([longterm,2*current],axis=0)/3
        self.discount = self.discount.set_index("Ticker")
    
    def getWeightedDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.Discount).reshape(len(self.discount),1)
        return np.matmul(weights,values)[0][0]
    
    def evaluateDiscount(self,weights):
        self.discount = self.discount.loc[weights.index]
        weights = np.array(weights).reshape(1,len(weights))
        values = np.array(self.discount.value).reshape(len(self.discount),1)
        
        return np.dot(weights,values)[0][0]
        
          

In [109]:
Discount().discount

Unnamed: 0_level_0,Effective,Distribution,Discount,52W Discount,longterm,value
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ADX,0.0,20.55,-13.27,-13.74,-4.304029,36.46878
AEF,7.9,2.64,-12.89,-14.09,-10.989011,33.155048
AGD,6.94,7.51,-12.7,-13.11,-3.754579,35.023824
AIO,2.95,5.05,-6.89,-10.25,-30.769231,9.423681
AOD,0.09,7.84,-13.81,-13.28,4.85348,41.063699
ASA,0.0,0.09,-15.96,-15.45,4.67033,47.143752
AWP,7.68,9.28,-12.22,-13.72,-13.736264,30.325558
BCX,1.2,6.63,-14.22,-15.29,-9.798535,37.350788
BDJ,0.27,7.19,-9.53,-9.34,1.739927,27.80077
BGR,0.22,7.88,-13.84,-9.02,44.139194,54.244627


In [88]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        #self.findInitialBest()
    
    def Evaluate(self,weights):
        asset = 1*(self.evaluateAssetOverlap(weights))
        discount = 0*-self.evaluateDiscount(weights)
        return asset+discount
    
    def changeWeights(self,fund1,fund2,weights,change):
        change = min(weights.loc[fund2][0],change)
        weights.loc[fund1,"weights"] += change
        weights.loc[fund2,"weights"] -= change
        return weights
    
    def EvaluateChange(self,fund1,fund2,weights,change):
        weights = self.changeWeights(fund1,fund2,weights,change)
        return self.Evaluate(weights)
    
    def findIdealChange(self,fund1,fund2,weights,change):
        control = self.Evaluate(weights)
        while self.EvaluateChange(fund1,fund2,weights,change)>control and self.weights.loc[fund2]!=0:
            weights = self.changeWeights()
        return control, weights
    
    def makeAdjustment(self,fund1,fund2,weights,change):
        control = self.Evaluate(weights)
        up = self.EvaluateChange(fund1,fund2,weights,change)
        down = self.EvaluateChange(fund2,fund1,weights,change)
        
        if down>control and down>up:
            fund1, fund2 = fund2, fund1
        elif control>up:
            return control,weights
        
        weights = self.changeWeights(fund1,fund2,weights,change)
        value, weights = self.findIdealChange(fund1,fund2,weights,change)
        
        return value,weights
    
    def randomFunds(self):
        fund2 = random.choice(self.weights.index)
        fund1 = random.choice(self.weights.index)
        return fund1, fund2
    
    def trial(self):
        fund1, fund2 = self.randomFunds()
        change = .1
        print(obj.weights.sum())
        print(self.Evaluate(self.weights))
        value, weights = self.makeAdjustment(fund1, fund2, self.weights, .1)
        print(obj.weights.sum())
        print(value)
        print(weights.loc[fund1], weights.loc[fund2])
    def findInitialBest(self):
        
        options = [self.addValueToFund(1,fund) for fund in self.weights.index]
        print(options)  
        
        

In [89]:
obj = runModel()
obj.trial()

weights    1.0
dtype: float64
106.41307826303483
29.33530743860453
106.41307826303483
106.70900506382624
105.91123259192975
105.91123259192975
105.91123259192975
weights    1.0
dtype: float64
29.525509909355467
weights    0.0
Name: DSE, dtype: float64 weights    0.055545
Name: MIE, dtype: float64


In [110]:
obj.weights.sum()

weights    1.0
dtype: float64

In [480]:
null = funds[funds["Ticker"].str[:4]=="NULL"]
notnull = funds[funds["Ticker"].str[:4]!="NULL"].groupby("ID")["Ticker"].first()
null.merge(notnull, how="left", left_on="ID", right_index=True)
#no overlap between null and having ticker

Unnamed: 0,weights
STK,0.029072
ADX,0.223149
CHN,0.0
MIE,0.0
SZC,0.0
CII,0.049814
GRF,0.017187
BCX,0.0
CRF,0.15304
CEN,0.0


In [403]:
Discount().discount

Unnamed: 0_level_0,Effective,Distribution,Discount,52W Discount,value
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ADX,0.0,20.55,-13.27,-13.74,-26.275577
AEF,7.9,2.64,-12.89,-14.09,-22.119033
AGD,6.94,7.51,-12.7,-13.11,-25.329223
AIO,2.95,5.05,-6.89,-10.25,0.624547
AOD,0.09,7.84,-13.81,-13.28,-32.011144
ASA,0.0,0.09,-15.96,-15.45,-36.525396
AWP,7.68,9.28,-12.22,-13.72,-19.310103
BCX,1.2,6.63,-14.22,-15.29,-25.563458
BDJ,0.27,7.19,-9.53,-9.34,-21.285559
BGR,0.22,7.88,-13.84,-9.02,-51.718269


In [199]:
FundAssets().addStocksOverlap()

61.39535179302859

In [198]:
FundAssets().simpleOverlap()

33.12524075359048

In [89]:
a = FundAssets().Portfolio.groupby("Company").agg({"S&P500":'count'})

STK
0
ADX
1
CHN
1
MIE
3
SZC
14


In [115]:
dict = {x:"sum" for x in range(2)}
dict.update({"h":'h'})

In [2]:
dict(zip([1,2],["sum","sum"]))

{1: 'sum', 2: 'sum'}

In [1]:
class runModel(AssetOverlap, Discount):
    def __init__(self):
        self.readFunds()
        self.readSP500()
        self.readDiscount()
        
        N = len(self.getFundNames(self.funds))
        weights = np.random.uniform(0,1.0,(N,1))
        weights = np.divide(weights,np.sum(weights))

        self.weights = pd.DataFrame(weights,columns=["weights"],index=self.getFundNames(self.funds))
        
        self.n = .1
        self.run_model()
        
    def Evaluate(self,weights):
        asset = .7*(self.evaluateAssetOverlap(weights))
        discount = .3*-self.evaluateDiscount(weights)
        return asset+discount
        
    def randomFunds(self):
        fund2 = random.choice(self.weights[self.weights["weights"]!=0].index)
        fund1 = random.choice(self.weights.index)
        return fund1, fund2
    
    def run_model(self):
        fund1, fund2 = self.randomFunds()

        for x in range(100):
            newWeights = self.weights.copy()
            
            change = .01
            if newWeights.loc[fund2][0] < change:
                change = newWeights.loc[fund2][0]
            if change == 0 or fund1==fund2: 
                fund1, fund2 = self.randomFunds()
                continue
                
            newWeights.loc[fund1] = [newWeights.loc[fund1]+change]
            newWeights.loc[fund2] = [newWeights.loc[fund2]-change]

            if self.Evaluate(newWeights) > self.Evaluate(self.weights):
                self.weights=newWeights.copy()
            else:
                fund1, fund2 = self.randomFunds()
        
        print(self.evaluateAssetOverlap(self.weights))
        print(self.addStocksOverlap(self.weights))
        print(self.getWeightedDiscount(self.weights))


    

{'a': 1, 'b': 2, 'c': 3}


In [None]:
# class runModel(AssetOverlap, Discount):
#     def __init__(self):
#         self.readFunds()
#         self.readSP500()
#         self.readDiscount()
        
#         N = len(self.getFundNames(self.funds))
#         weights = np.random.uniform(0,1.0,(N,1))
#         weights = np.divide(weights,np.sum(weights))

#         self.weights = pd.DataFrame(weights,index=self.getFundNames(self.funds))
        
#         self.n = .1
#         self.run_gradient_descent()
    
#     def run_gradient_descent(self):
#         for x in range(100):
#             gradient = -self.n *self.gradient()
#             self.weights = self.weights + gradient 
#             print("k",self.evaluateAssetOverlap())
#             print(self.evaluateDiscount())
            
#     def gradient(self):
#         asset = .5*(100-self.evaluateAssetOverlap())
#         discount = .5*self.evaluateDiscount()
#         print(asset,discount)
#         return asset+discount