<a href="https://colab.research.google.com/github/Narusi/My-Projects/blob/master/RevolutPortfolioNewAssets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Necessary libraries

In [13]:
!pip install yfinance



In [0]:
import pandas as pd
import numpy as np
import itertools
from scipy.stats import pearsonr, spearmanr

import requests
from bs4 import BeautifulSoup

import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(precision=3)

# Simple Momentum

In [0]:
def momentumBuy(close, asset='Asset', step=50, rper=1, output='current', chart=False, printOut=False):
    shortBuy = close > close.rolling(step).mean()
    mediumBuy = close > close.rolling(step * 2).mean()
    longBuy = close > close.rolling(step * 4).mean()

    medMBuy = close.rolling(step).mean() > close.rolling(step * 2).mean()
    longMBuy = close.rolling(step).mean() > close.rolling(step * 4).mean()

    veryLongBuy = close.rolling(step * 2).mean() > close.rolling(200).mean()

    buySignals = pd.DataFrame(index=close.index)

    buySignals['BuySS'] = shortBuy
    buySignals['BuySM'] = mediumBuy
    buySignals['BuySL'] = longBuy
    buySignals['BuyMM'] = medMBuy
    buySignals['BuyML'] = longMBuy
    buySignals['BuyLL'] = veryLongBuy

    ts = pd.DataFrame({asset:close.div(close.iloc[0]),
                    'Returns':np.log(close/close.shift(rper)).shift(rper),
                    'BuySignal':buySignals.sum(axis=1)},
                    index=close.index)

    momSum = ts.groupby('BuySignal')['Returns'].sum()
    momMed = ts.groupby('BuySignal')['Returns'].median()
    buySigns = []
    holdSignal = []
    sellSign = []
    losingSignals = momSum[momSum <= 0.0].index

    for bl in range(len(momSum.index)-1,0,-1):        
        if np.logical_and(momSum[bl] > 0.0, bl > max(losingSignals)):
            buySigns.append(bl)
        elif momSum[bl] > 0.0:
            holdSignal.append(bl)
        elif momMed[bl] <= 0.0:
            sellSign.append(bl)

    ts['Signal'] = ''
    ts.loc[ts[ts['BuySignal'] == 0].index, 'Signal'] = 'SELL'
    
    if len(buySigns) > 1:
        for buy in buySigns:
            ts.loc[ts[ts['BuySignal'] == buy].index, 'Signal'] = 'BUY'
    elif len(buySigns) == 1:
        ts.loc[ts[ts['BuySignal'] == buySigns[0]].index, 'Signal'] = 'BUY'
        
    if len(holdSignal) > 1:
        for hold in holdSignal:
            ts.loc[ts[ts['BuySignal'] == hold].index, 'Signal'] = 'HOLD'
    elif len(holdSignal) == 1:
        ts.loc[ts[ts['BuySignal'] == holdSignal[0]].index, 'Signal'] = 'HOLD'

    if len(sellSign) > 1:
        for sell in sellSign:
            ts.loc[ts[ts['BuySignal'] == sell].index, 'Signal'] = 'SELL'
    elif len(sellSign) == 1:
        ts.loc[ts[ts['BuySignal'] == sellSign[0]].index, 'Signal'] = 'SELL'

    for b in buySignals.columns:    
        rets = ts['Returns'][buySignals[b]]
        prob = rets[rets > 0.0].count()/rets.count()    
        if printOut: print(b, prob, rets.mean());

        ts[b] = 0
        ts.loc[buySignals[buySignals[b]].index, b] = 1

    if chart:
        ts['2018':][[asset,'BuySignal']].plot(figsize=(16,8))
        plt.show();

    if printOut: print(ts.groupby('BuySignal')['Returns'].mean());
    
    for i in range(min(6, len(momSum.index)-1)):
        minpick = ts.loc[ts[ts['BuySignal'] >= i].index]
        probs = minpick.groupby('BuySignal')['Returns'].count()/minpick['Returns'].count()    
        rets = minpick.groupby('BuySignal')['Returns'].mean()
        std = minpick.groupby('BuySignal')['Returns'].std()

        dprobs = minpick[minpick['Returns'] < 0.0].groupby('BuySignal')['Returns'].count()/minpick.groupby('BuySignal')['Returns'].count()
        #print((probs * rets).sum())
        #print(dprobs)
    
    ts['LAG.14'] = ts['Returns'].shift(14).rolling(14).sum()
    ts['LAG.28'] = ts['Returns'].shift(28).rolling(28).sum()
    ts['LAG.60'] = ts['Returns'].shift(60).rolling(60).sum()
    ts['LAG.90'] = ts['Returns'].shift(90).rolling(90).sum()
    lags = ['LAG.14','LAG.28','LAG.60','LAG.90']
    
    signalProbs = pd.DataFrame(index=list(range(len(list(itertools.product([1, 0], 
                                                                           repeat = len(buySignals.columns)))))),
                               columns=list(buySignals.columns) + lags)
    signalProbs[buySignals.columns] = 0
    signalProbs[lags] = 0.0

    signalReturns = pd.DataFrame(index=list(range(len(list(itertools.product([1, 0], 
                                                                           repeat = len(buySignals.columns)))))),
                               columns=list(buySignals.columns) + lags)
    signalReturns[buySignals.columns] = 0
    signalReturns[lags] = 0.0

    i = 0
    for comb in itertools.product([1, 0], repeat = len(buySignals.columns)):
        comb = list(comb)
        signalProbs.loc[i, buySignals.columns] = comb

        signInd = buySignals[buySignals == comb].dropna().index
        signRets = ts[lags].reindex(signInd)
        theProbs = signRets[signRets >= 0.05/step].count()/signRets.count()
        
        signalProbs.loc[i, lags] = theProbs
        signalReturns.loc[i, lags] = signRets.mean()
        i += 1
    
    for ss in buySignals.columns:
        rets = ts[ts[ss] == 1][lags].dropna()
        prob = rets[rets > 0.01].count()/rets.count()
        hrets = rets[rets > 0.01].mean()

        if printOut:
            print('\n',ss)
            print(rets.mean(), '\n',rets.median())
            print('\nProbabilities:')
            print(prob.mean())
            print((prob * hrets).mean())
    
    if printOut: print(ts.groupby('Signal')['Returns'].mean());
    
    if output == 'current' or output == 'single':
        currentState = (np.array(signalProbs[buySignals.columns].values) == np.array(ts[buySignals.columns].iloc[-1].values)).sum(axis=1) == len(buySignals.columns)
        lagOutput = []
        for lagProb in lags:
            action = ''
            if signalProbs[currentState][lagProb].values[0] > 0.75: action = 'BUY'
            elif signalProbs[currentState][lagProb].values[0] > 0.50: action = 'HOLD'
            elif signalProbs[currentState][lagProb].values[0] <= 0.50: action = 'SELL'

            lagOutput.append([lagProb, action, 
                            signalProbs[currentState][lagProb].values[0], 
                            signalReturns[currentState][lagProb].values[0]])

            if printOut:
                print(lagProb, action, 
                    round(100*signalProbs[currentState][lagProb].values[0],1),
                    round(100*signalReturns[currentState][lagProb].values[0],3))                
        
        if output == 'single':
            ttemp = pd.DataFrame(lagOutput)        
            return ttemp.loc[ttemp[2].idxmin(), 1]
        else:
            return lagOutput

    elif output == 'ts':
        return ts

In [0]:
def simpleMomentum(close, step=50, per=22, output='double', verbose=False):
    #Standard Momentum signals:
    shortBuy = close > close.rolling(step).mean()
    mediumBuy = close > close.rolling(step * 2).mean()
    longBuy = close > close.rolling(step * 4).mean()
    medMBuy = close.rolling(step).mean() > close.rolling(step * 2).mean()
    longMBuy = close.rolling(step).mean() > close.rolling(step * 4).mean()
    veryLongBuy = close.rolling(step * 2).mean() > close.rolling(200).mean()
    signal = ''

    buySignals = pd.DataFrame(index=close.index)
    buySignals['BuySS'] = shortBuy
    buySignals['BuySM'] = mediumBuy
    buySignals['BuySL'] = longBuy
    buySignals['BuyMM'] = medMBuy
    buySignals['BuyML'] = longMBuy
    buySignals['BuyLL'] = veryLongBuy
    buySignals['FRET'] = close.shift(-per)/close - 1

    current = buySignals.iloc[-1]
    hret = buySignals
    momSignals = ['BuySS','BuySM','BuySL','BuyMM','BuyML','BuyLL']
    strongBuy = pd.DataFrame()
    
    if hret.groupby(momSignals)['FRET'].min().max() > 0.0:
        hr = hret.groupby(momSignals)['FRET'].min()
        strongBuy = hr[hr>0.0]

    for s in momSignals:
        hret = buySignals[buySignals[s]==current[s]]
    
    if len(strongBuy.index) > 0:
        for i in strongBuy.index:
            if current[momSignals].to_list() == list(i): signal = 'STRONG BUY'
        

    profProb = hret['FRET'][hret['FRET'] > 0.01].count()/hret['FRET'].count()    
    avgRet = hret['FRET'].mean()

    if signal != 'STRONG BUY':
        if profProb/(1.-profProb) > 3. and avgRet > 0.0:
            signal = 'STRONG BUY'
        elif profProb/(1.-profProb) >= 2. and avgRet > 0.0:
            signal = 'BUY'
        elif profProb/(1.-profProb) <= 1.:
            signal = 'SELL'
        else:
            signal = 'HOLD'

    if output == 'single':
        return signal
    elif output == 'double':
        return signal, avgRet
    elif output == 'triple':
        return signal, avgRet, profProb
    elif output == 'current':
        return signal, profProb, lossProb, current    
    elif output == 'hret':
        return hret


In [17]:
a = 'HMY'
pps = yf.download(a)['Adj Close']

timeSeries = momentumBuy(pps, asset=a, rper=22, output='ts')
print(a, ' current state {}\n\n'.format(timeSeries['BuySignal'].iloc[-1]), 
      timeSeries.groupby(['Signal','BuySignal'])['Returns'].mean())
print(timeSeries.index[-1], '\n')

momentumBuy(pps, asset=a, rper=22, output='single'), simpleMomentum(pps, per=22, output='single')

[*********************100%***********************]  1 of 1 completed
HMY  current state 6

 Signal  BuySignal
BUY     3            0.016688
        4            0.027600
        5            0.047119
        6            0.109710
SELL    0           -0.091595
        1           -0.052737
        2           -0.001070
Name: Returns, dtype: float64
2020-01-17 00:00:00 



('HOLD', 'SELL')

# Machine Learning section

In [0]:
def featurize(x, per=22, tRet='auto', verbose=False):
    import numpy as np
    import pandas as pd
    from scipy.stats import pearsonr, spearmanr
    
    """
    x -        Pandas series with prices of an assset
    per -      Investment period
    tRet -     Target return. [tuple, 'auto', float]
               If tuple both targets are expected -> (lower, upper)
               If "Auto" then arithmetic mean return will be used for upper and lower bound. 
               Otherwise a float.
    """
    if tRet == 'auto':
        ret = np.log(x/x.shift(per)).fillna(0.0)
        lRet = -ret.mean()
        uRet = ret.mean()
    elif type(tRet) == float:
        lRet = -tRet
        uRet = tRet        
    elif type(tRet) == tuple:
        lRet, uRet = tRet
    
    ubnd = x * (1 + uRet)
    lbnd = x * (1 + lRet)
    
    y = np.zeros(shape=(len(x),1))
    y[x >= ubnd.shift(-per)] = 1
    y[x <= lbnd.shift(-per)] = -1
    
    if verbose:
        for i in range(-1,2,1):
            print(i, len(y[y==i])/len(y))
    
    X = pd.DataFrame(index=x.index)
    X['P'] = x
    if per >= 5:
        X['P'+str(int(per/2))] = np.log(x/x.shift(int(per/2)))
    X['P'+str(per)] = np.log(x/x.shift(per)).fillna(0.0)
    X['P'+str(2*per)] = np.log(x/x.shift(2*per)).fillna(0.0)
    
    X['P50'] = x.pct_change(50).fillna(0.0)
    X['P100'] = x.pct_change(100).fillna(0.0)
    X['P200'] = x.pct_change(200).fillna(0.0)

    X['50'] = x.rolling(50).mean()
    X['100'] = x.rolling(100).mean()
    X['200'] = x.rolling(200).mean()
    
    X['P/50'] = x/X['50']
    X['P/100'] = x/X['100']
    X['P/200'] = x/X['200']
    X['50/100'] = X['50']/X['100']
    X['100/200'] = X['100']/X['200']
    X['50/200'] = X['50']/X['200']
    
    X['Pc'] = 0
    X['P/50c'] = 0
    X['P/100c'] = 0
    X['P/200c'] = 0
    X['50/100c'] = 0
    X['100/200c'] = 0
    X['50/200c'] = 0

    X['Ps'] = 0
    X['P/50s'] = 0
    X['P/100s'] = 0
    X['P/200s'] = 0
    X['50/100s'] = 0
    X['50/200s'] = 0
    X['100/200s'] = 0
    
    #X['Ps']
    for i in X.index:
        for c in ['P/50','P/100','P/200','50/100','100/200','50/200']:
            if X.loc[i, c] > 1.0:
                if X.shift(1).loc[i, c+'c'] > 0:
                    X.loc[i, c+'c'] = X.shift(1).loc[i, c+'c'] + 1
                else:
                    X.loc[i, c+'c'] = 1
    
    for c in ['P/50c','P/100c','P/200c','50/100c','100/200c','50/200c']:
        X[c][np.logical_and(X[c]>0,X[c]<=5)] = 1
        X[c][np.logical_and(X[c]>5,X[c]<=10)] = 2
        X[c][np.logical_and(X[c]>10,X[c]<=20)] = 3
        X[c][np.logical_and(X[c]>20,X[c]<=50)] = 4
        X[c][X[c]>50] = 0

        cc = str(c)[:-1]
        s = cc+'s'
        
        long = X[np.logical_and(X[cc]>=1., X[cc].shift(1)<1.)].index
        if len(long)>0: X.loc[long, s] = 1;
        short = X[np.logical_and(X[cc]<=1., X[cc].shift(1)>1.)].index
        if len(short) > 0: X.loc[short, s] = -1;
    
    #X = X[['P'+str(per),'P/50','P/100','P/200','50/100','100/200','50/200',
    #       'P/50c','P/100c','P/200c','50/100c','100/200c','50/200c']]
    X = X.fillna(0.0)

#    for c in X.columns:
#        xc = (X[c]-X[c].mean())/X[c].std()
#        pears = pearsonr(xc.values, y[:,0])
#        spear = spearmanr(xc.values, y[:,0])
#        if pears[1] < spear[1]:
            #print(c,'Pearson',pears)
#            if pears[0] < 0.099: 
#                if verbose:print('Droping ',c);
                #X = X.drop(c,axis=1)
#        else:
            #print(c,spear)
#            if spear[0] < 0.1: 
#                if verbose: print('Droping ',c);
                #X = X.drop(c,axis=1)
    return X, y.ravel()


In [19]:
def momentum(prices, per=22, output='return/risk'):
    X,y = featurize(prices,per=per)
    X['ret'] = prices.shift(-per)/prices - 1

    cols = []
    for c in X.columns:
        if str(c)[-1] == 'c' or str(c)[-1] == 's': cols.append(c);
    
    mu = X.groupby(cols)['ret'].median()
    sigma = X.groupby(cols)['ret'].std()
    curr = tuple(X.iloc[-1][cols])

    if output == 'return/risk':
        return mu[curr], sigma[curr]
    elif output == 'return':
        return mu[curr]
    elif output == 'risk':
        return sigma[curr]
    elif output == 'return table':
        return mu
    elif output == 'risk rable':
        return sigma

momentum(yf.download('NVDA')['Adj Close'], output='return')

[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

0.027142585787447215

In [0]:
def svmMom(X, y, per=22,opt=False,backTest=False,showRet=False):
    from sklearn.svm import SVC
    import numpy as np
    import pandas as pd
    
    clf = SVC(gamma='auto')
    X = (X - X.mean())/X.std()
    if backTest:           
        clf.fit(X.iloc[:-per], y[:-per])        
        return clf
    else:
        clf.fit(X.iloc[:-252], y[:-252])
        predScore = clf.score(X.iloc[-252:-per],y[-252:-per])

        clf.fit(X.iloc[:-per],y[:-per])
        pred = int(clf.predict(X.iloc[-1:])[0])
        
        return pred, predScore

def rfcMom(X,y, per=22,opt=False,backTest=False,showRet=False):
    from sklearn.ensemble import RandomForestClassifier
    import numpy as np
    import pandas as pd
    
    if opt:
        max_n = np.shape(X)[1] * 2
        k = 1
        maxScore = 0.0
        ret = X.iloc[-252:-per, 1]
        X = (X - X.mean())/X.std()
        
        for i in range(1, max_n):
            clf = RandomForestClassifier(n_estimators=k*25)
            clf.fit(X.iloc[:-252],y[:-252])

            pred = clf.predict(X.iloc[-252:-per])        
            profit = np.mean(np.multiply(pred[pred==1], ret[pred==1]))
            profit -= np.mean(np.multiply(pred[pred==-1], ret[pred==-1]))
            #print('Profit: ', profit)
            if  profit > maxScore:
                maxScore = profit
                k = i
    else:
        k=2
    
    clf = RandomForestClassifier(n_estimators=k*50)
    clf.fit(X.iloc[:-per],y[:-per])
    if backTest:
        return clf
    else:
        clf.fit(X.iloc[:-252], y[:-252])
        predScore = clf.score(X.iloc[-252:-per],y[-252:-per])

        clf.fit(X.iloc[:-per],y[:-per])
        pred = int(clf.predict(X.iloc[-1:])[0])
        
        if showRet:
            return pred, predScore, maxScore
        else:
            return pred, predScore

def kncMom(X,y,per=22,opt=False,backTest=False,showRet=False):
    from sklearn.neighbors import KNeighborsClassifier
    import numpy as np
    import pandas as pd
    
    if opt:
        max_n = np.shape(X)[1]
        k = 1
        maxScore = 0.0
        ret = X.iloc[-252:-per, 1]
        X = (X - X.mean())/X.std()
        for i in range(1, max_n-1):
            clf = KNeighborsClassifier(n_neighbors=i)
            clf.fit(X.iloc[:-252],y[:-252])

            pred = clf.predict(X.iloc[-252:-per])        
            profit = np.mean(np.multiply(pred[pred==1], ret[pred==1]))
            profit -= np.mean(np.multiply(pred[pred==-1], ret[pred==-1]))
            #print('Profit: ', profit)
            if  profit > maxScore:
                maxScore = profit
                k = i
    else:
        k = 6

    clf = KNeighborsClassifier(n_neighbors=k)
    if backTest:         
        clf.fit(X.iloc[:-per],y[:-per])
        return clf
    else:
        clf.fit(X.iloc[:-252], y[:-252])
        predScore = clf.score(X.iloc[-252:-per],y[-252:-per])

        clf.fit(X.iloc[:-per],y[:-per])
        pred = int(clf.predict(X.iloc[-1:])[0])
        
        if showRet:
            return pred, predScore, maxScore
        else:
            return pred, predScore

def gbcMom(X,y,per=22,opt=False,backTest=False,showRet=False):
    from sklearn.ensemble import GradientBoostingClassifier
    import numpy as np
    import pandas as pd
    
    ret = X.iloc[-252:-per, 1]
    if opt:
        max_n = np.shape(X)[1] * 2
        k = 1
        maxScore = 0.0

        for i in range(1, max_n):
            clf = GradientBoostingClassifier(n_estimators = k * 25)
            clf.fit(X.iloc[:-252],y[:-252])
            pred = clf.predict(X.iloc[-252:-per])
            
            profit = np.mean(np.multiply(pred[pred==1], ret[pred==1]))
            profit -= np.mean(np.multiply(pred[pred==-1], ret[pred==-1]))
            #print('Profit: ', profit)
            if  profit > maxScore:
                maxScore = profit
                k = i
    else:
        k = 4

    clf = GradientBoostingClassifier(n_estimators = k * 25)
    clf.fit(X.iloc[:-per],y[:-per])
    
    if backTest:
        return clf
    else:        
        pred = int(clf.predict(X.iloc[-1:])[0])
        clf.fit(X.iloc[:-252], y[:-252])
        predScore = clf.score(X.iloc[-252:-per],y[-252:-per])
        
        if showRet:
            pred = clf.predict(X.loc[ret.index])
            stratRet = np.mean(np.multiply(pred[pred==1], ret[pred==1]))
            stratRet -= np.mean(np.multiply(pred[pred==-1], ret[pred==-1]))
            return pred, predScore, stratRet
        else:
            return pred, predScore

a = 'SWED-A.ST'
p = 22
#pr = yf.download(a, period='10y')['Adj Close']
#r = (pr.shift(p)/pr - 1).fillna(0.0)

#X,y = featurize(apr, per=p)
#print('Obtained X and y for {}\n'.format(a))

#print('SVM', svmMom(X,y,per=p,showRet=True))
#print('RFC', rfcMom(X,y,per=p,showRet=True,opt=True))
#print('RFC', rfcMom(X,y,per=p,showRet=True))
#print('GBC', gbcMom(X,y,per=p,showRet=True,opt=True))
#print('GBC', gbcMom(X,y,per=p,showRet=True))
#print('KNN', kncMom(X,y,per=p,showRet=True))

# Backtesting

In [0]:
def backtestMLmomentum(bt, tPeriod=252, hPeriod=30, 
                       verbose=False, useAccuracy=False, 
                       getPrediction=False, opt=False):
    testPeriod = tPeriod # Testing period in days
    h = hPeriod #Investment horizon in days
    models = ['SVM','GBC','RFC','KNN']
    
    if bt.count().min() <= 2*tPeriod + 201:        
        print('Backtesting requires at least {} records of prices.'.format(2*tPeriod + 201))
        print(bt.count().min(), ' records passed.')
        return 0

    results = pd.DataFrame(index=bt.columns)
    if useAccuracy: accuracy = pd.DataFrame(index=bt.columns)
    for m in models:
        results[m+' Long Ret'] = .0
        results[m+' Max Loss'] = .0
        results[m+' AVG LR'] = .0
        
        if useAccuracy:
            accuracy[m+' Hit Rate'] = .0
            accuracy[m+' Miss Rate'] = .0
            accuracy[m+' Score'] = .0

    if getPrediction: preDF = pd.Series(index=bt.columns);
    for a in bt.columns:
        if verbose: print(a);

        #Define past day and future investment forizon returns
        pp = bt[a][-testPeriod:]
        ret = np.log(pp/pp.shift(1))
        fret = np.log(pp.shift(-h)/pp).fillna(0.0)

        #Creating training and testing datasets
        low = -(0.01/22)*h
        high = (0.03/22)*h
        X, y = featurize(bt[a].dropna(), per=h, tRet=(low,high))
        X_train, y_train = X[:-testPeriod], y[:-testPeriod]
        X_test, y_test = X[-testPeriod:], y[-testPeriod:]

        #Training the trading models
        if opt:
            svm = svmMom(X_train, y_train, per=h, backTest=True)
            rfc = rfcMom(X_train, y_train, per=h, opt=True, backTest=True)
            knc = kncMom(X_train, y_train, per=h, opt=True, backTest=True)
            gbc = gbcMom(X_train, y_train, per=h, opt=True, backTest=True)
        else:
            svm = svmMom(X_train, y_train, per=h, backTest=True)
            rfc = rfcMom(X_train, y_train, per=h, backTest=True)
            knc = kncMom(X_train, y_train, per=h, backTest=True)
            gbc = gbcMom(X_train, y_train, per=h, backTest=True)

        #Calculating model scores
        scores = [
            round(svm.score(X_test, y_test),4),
            round(gbc.score(X_test, y_test),4),
            round(rfc.score(X_test, y_test),4),
            round(knc.score(X_test, y_test),4)
        ]

        #Storing model predictions for test set
        predictions = [
            np.array(svm.predict(X_test)),
            np.array(gbc.predict(X_test)),
            np.array(rfc.predict(X_test)),
            np.array(knc.predict(X_test))
        ]

        #Savign he results in dataframe and printing out few key points
        if verbose: print('Historical profitability:');
        if getPrediction:
            modelTable = pd.DataFrame(index=models, columns=list(['RET','HIT','HITL',
                                                                  'MISS','SC','WINR',
                                                                  'COUNT','AVGR','AVGL']),
                                      dtype=float)
            
        for m in range(len(models)):
            hit = ret[np.logical_and(predictions[m] == 1, fret > high)].count()
            miss = ret[np.logical_and(predictions[m] == 1, fret < low)].count()
            predicted = len(predictions[m][predictions[m] == 1])
            predicLoss = ret[np.logical_and(predictions[m] == -1, fret < low)].count()
            actual = len(predictions[m][fret > .03])
            
            results.loc[a, models[m]+' Long Ret'] = round(ret.shift(-1)[predictions[m] == 1].sum(),4)
            results.loc[a, models[m]+' Max Loss'] = round(fret[predictions[m] == 1].min(),4)
            results.loc[a, models[m]+' AVG LR'] = round(fret[predictions[m] == 1].mean(),4)
            
            if getPrediction:
                modelTable.loc[models[m],'RET'] = results.loc[a, models[m]+' Long Ret']/predicted
                modelTable.loc[models[m],'HIT'] = hit
                modelTable.loc[models[m],'HITL'] = predicLoss/len(predictions[m][predictions[m] == -1])
                modelTable.loc[models[m],'MISS'] = miss
                modelTable.loc[models[m],'SC'] = scores[m]
                modelTable.loc[models[m],'WINR'] = np.dot(predictions[m], fret)
                modelTable.loc[models[m],'COUNT'] = predicted
                modelTable.loc[models[m],'AVGR'] = round(fret[predictions[m] == 1].mean(),4)
                modelTable.loc[models[m],'AVGL'] = round(fret[predictions[m] == -1].mean(),4)
        if verbose: print(a,'\n', modelTable);

        if getPrediction:
            #modelTable['RATE'] = ((modelTable-modelTable.min())/(modelTable.max()-modelTable.min())).sum(axis=1)
            #modelTable['RATE'] = modelTable['RATE']*(1+modelTable['AVGR'])**h
            model = modelTable['WINR'].idxmax()

            if model == 'SVM':
                preDF[a] = svmMom(X,y)[0]
            elif model == 'RFC':
                preDF[a] = rfcMom(X,y)[0]
            elif model == 'GBC':
                preDF[a] = gbcMom(X,y)[0]
            elif model == 'KNN':
                preDF[a] = kncMom(X,y)[0]

            if useAccuracy:
                accuracy.loc[a, models[m]+' Hit Rate'] = hit/actual
                accuracy.loc[a, models[m]+' Miss Rate'] = miss/predicted
                accuracy.loc[a, models[m]+' Score'] = scores[m]
            
            if verbose:
                if predicted == 0:
                    print(models[m], '({}/{})'.format(predicted,actual))
                else:
                    print(models[m], '({}/{})'.format(predicted,actual),'\n',
                          'Hit Rate:    ',accuracy.loc[a, models[m]+' Hit Rate'],'\n',
                          'Miss Rate:   ',accuracy.loc[a, models[m]+' Miss Rate'],'\n',
                          'Max. loss:  ',fret[predictions[m] == 1].min(),'\n',
                          'Mean profit: ',fret[predictions[m] == 1].mean())
        if verbose: print('=====================================\n');
        
    if useAccuracy:
        return results, accuracy
    elif getPrediction:
        return preDF
    else:        
        return results

#backtestMLmomentum(yf.download(['TSLA','AAPL'])['Adj Close'], 
#                   tPeriod=252, hPeriod=22, getPrediction=True)

In [0]:
def modelPick(instruments, btPrices=pd.DataFrame(), predict=False):
    models = ['SVM','GBC','RFC','KNN']
    modelPicks = pd.Series(index = instruments)

    if len(btPrices.index) <= 3*252: btPrices = yf.download(instruments)['Adj Close'];
    results, accuracy = backtestMLmomentum(btPrices, tPeriod=252, 
                                           hPeriod=30, useAccuracy=True)

    for i in instruments:
        res = results.loc[i]
        acc = accuracy.loc[i]
        
        table = pd.DataFrame(index=models)
        table['Long Ret'] = .0
        table['Hit Rate'] = .0
        table['Miss Rate'] = .0
        table['AVG LR'] = .0
        table['Score'] = .0
            
        for m in models:
            table.loc[m, 'Long Ret'] = res[m+' Long Ret']
            table.loc[m, 'Hit Rate'] = acc[m+' Hit Rate']
            table.loc[m, 'Miss Rate']= acc[m+' Miss Rate']
            table.loc[m, 'AVG LR'] = res[m+' AVG LR']
            table.loc[m, 'Score'] = acc[m+' Score']
            
        print(table)
        table = table[table['Hit Rate'] >= table['Hit Rate'].mean()]
        if len(table[table['Miss Rate'] < table['Miss Rate'].median()].index) > 2:
            table = table[table['Miss Rate'] < table['Miss Rate'].median()]
        model = table['AVG LR'].idxmax()
            
        print('Best model for ', i,' is ', model)
        modelPicks[i] = model

    return modelPicks

# Data Gathering

## Scraping Yahoo Finance watchlists

In [23]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

watchlists = {
    'Bullet-Proof Balance Sheets':'https://finance.yahoo.com/u/motif/watchlists/bullet-proof-balance-sheets',
    'Cloud Computing':'https://finance.yahoo.com/u/motif/watchlists/cloud-computing1',
    'Robotic Revolution':'https://finance.yahoo.com/u/motif/watchlists/robotic-revolution',
    'Healthy Living':'https://finance.yahoo.com/u/yahoo-finance/watchlists/healthy-living',
    'Golden Crosses':'https://finance.yahoo.com/u/yahoo-finance/watchlists/sma-bullish-cross',
    'Recent MACD Bullish Crosses':'https://finance.yahoo.com/u/yahoo-finance/watchlists/macd-bullish-cross',
    'Biggest Earnings Beats':'https://finance.yahoo.com/u/yahoo-finance/watchlists/earnings-beat',
    'High-Yield Dividends':'https://finance.yahoo.com/u/motif/watchlists/high-yield-dividends',
    'Most Environmentally Friendly Companies':'https://finance.yahoo.com/u/yahoo-finance/watchlists/environmental-high',
    'Oversold Stocks':'https://finance.yahoo.com/u/yahoo-finance/watchlists/rsi-bearish-cross',
    'The Berkshire Hathaway Portfolio':'https://finance.yahoo.com/u/yahoo-finance/watchlists/the-berkshire-hathaway-portfolio',
    'Top Crypto Bets':'https://finance.yahoo.com/u/yahoo-finance/watchlists/top-crypto-bets',
    'Most Added to Watchlists':'https://finance.yahoo.com/u/yahoo-finance/watchlists/most-added',
    'Top Cryptos by Tokens Outstanding':'https://finance.yahoo.com/u/yahoo-finance/watchlists/crypto-top-tokens-outstanding'    
}

def scrapeWatchlist(wPath = ''):
    page = requests.get(wPath)
    soup = BeautifulSoup(page.content, 'html.parser')

    assets = []
    for a in soup.find_all('a'):
        qq = str(a.get('href')).split('/')
        if qq[1] == 'quote':
            if not '?' in qq[2]:
                assets.append(qq[2])
    return assets

wlAssets = []
for key in watchlists:
    newAssets = scrapeWatchlist(watchlists[key])
    wlAssets += newAssets
    print('{} ({})'.format(key, len(newAssets)))
wlAssets = pd.Series(wlAssets).drop_duplicates().sort_values()

print('\n{} assets in total.\n'.format(len(wlAssets.values)))
wlAssets.values

Bullet-Proof Balance Sheets (18)
Cloud Computing (16)
Robotic Revolution (20)
Healthy Living (10)
Golden Crosses (19)
Recent MACD Bullish Crosses (30)
Biggest Earnings Beats (6)
High-Yield Dividends (16)
Most Environmentally Friendly Companies (30)
Oversold Stocks (30)
The Berkshire Hathaway Portfolio (20)
Top Crypto Bets (14)
Most Added to Watchlists (30)
Top Cryptos by Tokens Outstanding (24)

258 assets in total.



array(['AAPL', 'ABB', 'ACB', 'ACGLO', 'ACN', 'ADA-USD', 'ADBE', 'ADI',
       'ADP', 'AEL', 'AIG', 'ALXN', 'AMD', 'AME', 'AMTD', 'AMZN', 'ARAY',
       'ARDR-USD', 'AVAV', 'BA', 'BABA', 'BAC', 'BAC-PL', 'BAT-USD',
       'BBY', 'BCE', 'BCN-USD', 'BCS', 'BDN', 'BHVN', 'BIDU', 'BILI',
       'BKR', 'BLK', 'BMO', 'BRK-A', 'BRKS', 'BTI', 'BTS-USD', 'BYND',
       'CAT', 'CBB-PB', 'CBOE', 'CDNS', 'CGNX', 'CHKP', 'CI', 'CIB',
       'CMCSA', 'CME', 'CMPR', 'CNC', 'CNP', 'COP', 'COST', 'CPRT',
       'CSCO', 'CTA-PA', 'CTAS', 'CTSH', 'CVX', 'D', 'DAL', 'DCN-USD',
       'DG', 'DGB-USD', 'DIS', 'DKS', 'DLR-PJ', 'DOGE-USD', 'DRH', 'DVA',
       'DXCM', 'EC', 'EMR', 'EOS-USD', 'ESLT', 'ET', 'EXPE', 'FB', 'FDX',
       'FHN', 'FIT', 'FL', 'FMS', 'FOXA', 'FRC-PH', 'FTV', 'FUN-USD',
       'GBTC', 'GD', 'GE', 'GEF', 'GIS', 'GLW', 'GM', 'GNC', 'GNT-USD',
       'GOOG', 'GOOGL', 'GRMN', 'GRPN', 'GS', 'GS-PA', 'GS-PK', 'HCM',
       'HLF', 'HOLI', 'HON', 'HP', 'HSBC-PA', 'HSY', 'HUM', 'IAC', 'IBKR',
 

## Predefined assets

In [24]:
portfolio = ['T', 'NVDA','AMTD','SO','ABBV','HMY']

baltics = ['AMG1L.VS','APG1L.VS','ARC1T.TL','AUG1L.VS','BAL1R.RG','BLT1T.TL',
           'BTE1R.RG','CPA1T.TL','DPK1R.RG','EEG1T.TL','EFT1T.TL','ESO1L.VS',
           'EWA1L.VS','GRD1R.RG','GRG1L.VS','GZE1R.RG','HAE1T.TL','HMX1R.RG',
           'INC1L.VS','INL1L.VS','INR1L.VS','IVL1L.VS','K2LT.VS','KA11R.RG',
           'KNF1L.VS','KNR1L.VS','LGD1L.VS','LHV1T.TL','LINDA.RG','LJM1R.RG',
           'LNA1L.VS','LNR1L.VS','LNS1L.VS','MDARA.RG','MRK1T.TL','NCN1T.TL',
           'NEOFI.RG','NTU1L.VS','OLF1R.RG','PKG1T.TL','PRF1T.TL','PTR1L.VS',
           'PZV1L.VS','RAR1R.RG','RER1R.RG','RJR1R.RG','RKB1R.RG','RRR1R.RG',
           'RSU1L.VS','SAB1L.VS','SAF1R.RG','SCM1R.RG','SFG1T.TL','SKN1T.TL',
           'SMA1R.RG','SNG1L.VS','TAL1T.TL','TEL1L.VS','TKM1T.TL','TPD1T.TL',
           'TSM1T.TL','TVEAT.TL','UTR1L.VS','VBL1L.VS','VLP1L.VS','VSS1R.RG',
           'ZMP1L.VS']

dividendStocks = ['AAPL','ABBV','ADBE','AEP','AGNC','AMTD','AMZN','APD','ARR',
                  'BABA','BAH','BEN','C','CAT','CL','CSCO','CVS','CVX','DUK',
                  'FAST','FB','FDX','FITB','GOOG','GOLD','HMY','IBM','INTC',
                  'IRBT','JNJ','JPM','K','KO','MA','MAIN','MCD','MED','MMM',
                  'MSFT','MSI','NFLX','NKE','NVDA','O','OKE','PEP','PG','PYPL',
                  'RGLD','SBUX','SO','TGT','TSLA','TWO','UNH','V','VGR','VZ',
                  'WELL','WM','WMT','XOM','YORW']

assets = portfolio + baltics + dividendStocks + wlAssets.to_list()
assets = pd.Series(assets).drop_duplicates().to_list()
print('{} assets in total.\n'.format(len(assets)))
np.sort(assets)

360 assets in total.



array(['AAPL', 'ABB', 'ABBV', 'ACB', 'ACGLO', 'ACN', 'ADA-USD', 'ADBE',
       'ADI', 'ADP', 'AEL', 'AEP', 'AGNC', 'AIG', 'ALXN', 'AMD', 'AME',
       'AMG1L.VS', 'AMTD', 'AMZN', 'APD', 'APG1L.VS', 'ARAY', 'ARC1T.TL',
       'ARDR-USD', 'ARR', 'AUG1L.VS', 'AVAV', 'BA', 'BABA', 'BAC',
       'BAC-PL', 'BAH', 'BAL1R.RG', 'BAT-USD', 'BBY', 'BCE', 'BCN-USD',
       'BCS', 'BDN', 'BEN', 'BHVN', 'BIDU', 'BILI', 'BKR', 'BLK',
       'BLT1T.TL', 'BMO', 'BRK-A', 'BRKS', 'BTE1R.RG', 'BTI', 'BTS-USD',
       'BYND', 'C', 'CAT', 'CBB-PB', 'CBOE', 'CDNS', 'CGNX', 'CHKP', 'CI',
       'CIB', 'CL', 'CMCSA', 'CME', 'CMPR', 'CNC', 'CNP', 'COP', 'COST',
       'CPA1T.TL', 'CPRT', 'CSCO', 'CTA-PA', 'CTAS', 'CTSH', 'CVS', 'CVX',
       'D', 'DAL', 'DCN-USD', 'DG', 'DGB-USD', 'DIS', 'DKS', 'DLR-PJ',
       'DOGE-USD', 'DPK1R.RG', 'DRH', 'DUK', 'DVA', 'DXCM', 'EC',
       'EEG1T.TL', 'EFT1T.TL', 'EMR', 'EOS-USD', 'ESLT', 'ESO1L.VS', 'ET',
       'EWA1L.VS', 'EXPE', 'FAST', 'FB', 'FDX', 'FHN', 'FIT', 'FITB',

In [25]:
data = yf.download(assets, period='10y').sort_index().resample('D').last()

prices = data['Adj Close'].fillna(method="ffill")
close = data['Close'].fillna(method="ffill")

(prices.count()[prices.count() < prices.count().quantile(0.05)]/365).sort_values()

[*********************100%***********************]  360 of 360 completed

2 Failed downloads:
- SFLY: No data found, symbol may be delisted
- PCLN: No data found for this date range, symbol may be delisted


SFLY        0.000000
PCLN        0.000000
NEOFI.RG    0.008219
LINDA.RG    0.013699
CPA1T.TL    0.109589
BTE1R.RG    0.591781
CTA-PA      0.627397
LJM1R.RG    0.652055
LK          0.679452
BYND        0.720548
PINS        0.758904
FOXA        0.860274
SPCE        0.863014
NIO         1.356164
PDD         1.487671
TSM1T.TL    1.602740
BILI        1.816438
NTU1L.VS    1.835616
dtype: float64

## Data cleaning

In [29]:
for a in prices.count()[prices.isna().sum() > 5*365].index:
    if a not in portfolio:
        prices = prices.drop(a, axis=1)
        print('Droping ' + a + ' for lack of historical prices.')

print('\n', len(prices.columns), ' assets left.')


 297  assets left.


# Signal testing

## ML function testing

In [0]:
#modelPick(portfolio)

## Signal testing

In [31]:
period = 30
for a in portfolio:
    print(a)
    print(pd.DataFrame(momentumBuy(prices[a], a, rper=period, output='current'), 
                       columns=list(['LAG','Signal','Profit Prob.','Return'])))
    ts = momentumBuy(prices[a], a, rper=period, output='ts')
    c = ts.iloc[-1]['BuySignal']
    print('\nBuy signal count:', c, '(',
          round(100*ts.groupby(['BuySignal'])['Returns'].mean()[c],2), '%)')
    print(simpleMomentum(prices[a], per=period, output='triple'))

    X,y = featurize(prices[a], per=period)
    signal = rfcMom(X,y,per=period,opt=True)
    
    s = ''
    if signal[0] == 1.: s='Buy'
    elif signal[0] == -1.: s='Sell'
    else: s='Hold';

    print('\nMachine Learning signal: {} ({}%)'.format(s, 
                                                       round(100*signal[1],2)))
    print('===========================\n')

backtestMLmomentum(prices[portfolio], hPeriod=period, 
                   getPrediction=True, opt=False)

T
      LAG Signal  Profit Prob.    Return
0  LAG.14    BUY      0.848569  0.405844
1  LAG.28    BUY      0.856879  0.744494
2  LAG.60    BUY      0.777470  0.950214
3  LAG.90   HOLD      0.642245  0.664013

Buy signal count: 6 ( 2.92 %)
('SELL', 0.005311778699884057, 0.45900857959961866)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

ValueError: ignored

# Profile building

In [0]:
profiles = pd.DataFrame(columns=list(['Name', 'Industry', 'Sub-Industry']), 
                        index=prices.columns, dtype=str)
profiles['Action'] = ''
profiles['Profit Prob.'] = 0.0
profiles['Beta'] = 0.0
profiles['RET'] = prices.pct_change(22)[profiles.index].mean().mul(12)
profiles['STD'] = prices.pct_change(22)[profiles.index].std().mul(np.sqrt(12))

pproc = .0
i = 1
for a in prices.columns:
    profile_page = requests.get('https://finance.yahoo.com/quote/' + str(a) + '/profile?p=' + str(a))
    soup = BeautifulSoup(profile_page.content, 'html.parser')

    try:
        profiles.loc[a, 'Name'] = soup.find('h3', {'class':"Fz(m) Mb(10px)"}).text
    
        for span in soup.find_all('span', {'class':'Fw(600)'}):
            if str(span).find('data-reactid="21"') > 0: 
                if len(span.text) > 3: profiles.loc[a, 'Industry'] = span.text
            elif str(span).find('data-reactid="22"') > 0:
                if len(span.text) > 3: profiles.loc[a, 'Industry'] = span.text
            elif str(span).find('data-reactid="23"') > 0:
                if len(span.text) > 3: profiles.loc[a, 'Industry'] = span.text
        
            if str(span).find('data-reactid="25"') > 0:
                if len(span.text)>3:profiles.loc[a, 'Sub-Industry'] = span.text
            elif str(span).find('data-reactid="26"') > 0:
                if len(span.text)>3:profiles.loc[a, 'Sub-Industry'] = span.text
            elif str(span).find('data-reactid="27"') > 0:
                if len(span.text)>3:profiles.loc[a, 'Sub-Industry'] = span.text
        
        aact, aRet, curr = simpleMomentum(prices[a], per=22, output='triple')
        
        profiles.loc[a, 'Action'] = aact
        profiles.loc[a, 'Profit Prob.'] = round(100*curr,2)

        mu, sigma = momentum(prices=prices[a])
        if type(mu) != float: 
            profiles.loc[a, 'RET'] = 0.75*aRet + 0.25*profiles.loc[a, 'RET']
        else:
            profiles.loc[a, 'RET'] = mu
        
        if type(sigma) == float: profiles.loc[a, 'STD'] = sigma;
    except:
        print('Failed to read data of ',a)
        prices = prices.drop(a, axis=1)
        profiles = profiles.drop(a, axis=0)
    
    proc = round(i/len(prices.columns),2)
    
    if proc % 0.1 == .0 and proc > pproc:
        pproc = proc
        print('{}% processed.'.format(int(1000 * proc)/10))
    i += 1

profiles.info()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

## Calculating individual and portfolio returns (daily, weekly and monthly)

In [0]:
ret = prices.pct_change().dropna()
ret['Portfolio'] = ret[portfolio].mean(axis=1)
wret = prices.asfreq(freq='W', method='ffill').pct_change().dropna()
wretRoll = prices.pct_change(7).dropna()
mret = prices.pct_change(28).fillna(0.0)
mret['Portfolio'] = mret[portfolio].mean(axis=1)
mretRoll = prices.pct_change(22).dropna()
mretRoll['Portfolio'] = mretRoll[portfolio].mean(axis=1).fillna(0.0)

log_ret = np.log(prices.div(prices.shift(1))).fillna(0.0)
log_ret['Portfolio'] = log_ret[portfolio].mean(axis=1).fillna(0.0)
mlogret = np.log(prices/prices.shift(1)).rolling(28).sum().fillna(0.0)
mlogret['Portfolio'] = mlogret[portfolio].mean(axis=1)

profiles['Beta'] = mlogret.cov()['Portfolio']/mlogret.drop('Portfolio', axis=1).var().loc[profiles.index]

mlogret['Portfolio'].describe()

# Portfolio Analysis

In [0]:
#Need to add Price to Earnings and Price to Book to profiles
profiles.loc[portfolio]

In [0]:
profiles.groupby('Sub-Industry')['Name'].count()

## Sectors invested

In [0]:
portIndCount = profiles.loc[portfolio]['Industry'].drop_duplicates().count()
industryCount = profiles['Industry'].drop_duplicates().count()

profiles.loc[portfolio].groupby('Industry')['Name'].count()

## Plot graphs

In [0]:
portfolioGrowth = pd.Series(np.cumprod(np.exp(log_ret['Portfolio'])))
portfolioGrowth.plot(figsize=(12,6))
plt.show();

mretRoll[portfolio].boxplot()
plt.show();

# Filtering down the investment opportunities

In [0]:
profiles.groupby(['Industry','Sub-Industry'])['Beta'].count()

In [0]:
#names = ['NVDA','TSLA','ADBE','BABA']
#period = 22
#backtestMLmomentum(prices[names], hPeriod=period, getPrediction=True)

## Calculating how much assets add to portfolios Sharpe ratio



In [0]:
profiles['Added Sharpe'] = 0.0

portfolioSharpe = mret[portfolio].mean(axis=1).mean()/mret[portfolio].mean(axis=1).std()

for n in profiles.index:
    t = portfolio[:]
    t.append(n)    
    mu = profiles.loc[t,'RET'].mean()
    sigma = mret[t].mean(axis=1).std()
    profiles.loc[n, 'Added Sharpe'] = mu/sigma - portfolioSharpe

print('Most added value:')
profiles = profiles.sort_values(by='Added Sharpe', ascending=False)
profiles[profiles['Added Sharpe'] > profiles['Added Sharpe'].quantile(0.95)]

## Filtering out industries in which portfolio is already invested in

In [0]:
portfolio_ind = profiles.loc[portfolio].groupby('Industry').count()['Name']
portfolio_subind = profiles.loc[portfolio].groupby('Sub-Industry').count()['Name']

#print(portfolio_subind)
selection = profiles
industryCount = len(profiles.groupby('Industry').count().index)
subIndustryCount = len(profiles.groupby('Sub-Industry').count().index)

port_Ind_fully_invested = len(portfolio_ind.index) == industryCount
port_subInd_fully_invested = len(portfolio_subind.index) == subIndustryCount

for i in portfolio_ind.index:
    if port_Ind_fully_invested:
        if portfolio_ind.loc[i] == portfolio_ind.min():
            selection = selection[selection['Industry'] == i]
        else:
            selection = selection[selection['Industry'] != i]
    else:
        selection = selection[selection['Industry'] != i]
for i in portfolio_subind.index:
    if port_subInd_fully_invested:
        if portfolio_subind.loc[i] == portfolio_subind.min():
            selection = selection[selection['Industry'] == i]
        else:
            selection = selection[selection['Industry'] != i]
    else:
        selection = selection[selection['Sub-Industry'] != i]

selection = selection[selection['Action'] != 'SELL']
selection.sort_values(by=['Beta', 'Profit Prob.'])

## Filtering assets with lowest downside correlation with portfolio

In [0]:
selAss = selection.index.to_list()
ret = ret[selAss+['Portfolio']]
low_corr = ret[ret['Portfolio'] < 0.0].corr()[ret.corr()['Portfolio'] <= ret.corr()['Portfolio'].quantile(0.50)]

sns.heatmap(mretRoll.corr().loc[low_corr.index, low_corr.index])
plt.show();

print('Lowest correlating assets:')
profiles.loc[low_corr.index].sort_values(by='Beta')

## Checking industries with lowest Beta against portfolio

In [0]:
profiles.loc[low_corr.index].groupby(['Industry','Sub-Industry'])['Beta'].mean()

# Selecting assets from the filtered assets

## Checking for BUY signals with simple Momemntum strategies

In [0]:
niceStocks = []
for a in low_corr.index:
    signTable = pd.DataFrame(momentumBuy(prices[a], a, rper=22, output='current'), 
                             columns=list(['LAG','Signal','Profit Prob.','Return']))
    
    ts = momentumBuy(prices[a], a, rper=22, output='ts')        
    stateTable = ts.groupby(['Signal','BuySignal'])['Returns'].mean()
    currSt = ts.iloc[-1][['Signal','BuySignal']]
    expRet = round(stateTable[(currSt['Signal'], currSt['BuySignal'])], 3)
    pProb = signTable['Profit Prob.'].mean()

    smplState, smplRet, smplProb = simpleMomentum(prices[a], per=22, output='triple')

    if (signTable['Profit Prob.'].mean() >= 0.70 and expRet >= 0.05) or (smplState=='BUY' and smplProb >= 60):
        minRet = int(10000*min(expRet,smplRet))/100
        maxRet = int(10000*max(expRet,smplRet))/100
        minProb = int(10000*min(pProb,smplProb))/100
        maxProb = int(10000*max(pProb,smplProb))/100
        print('{} expected return: {}% - {}% ({}% - {}%)'.format(a, minRet,maxRet,
                                                                 minProb,maxProb))
        niceStocks.append(a)

In [0]:
profiles.loc[niceStocks+['C','AAPL','FDX','NKE','TGT','TWO','JPM']]

# Machine Learning asses selection

## Checking out assets that are chosen by simple Momentum strategy first

In [0]:
strongBuy = profiles[profiles['Action'] == 'Strong Buy']
if len(strongBuy.index) > 0:
    strongBuy['STR'] = backtestMLmomentum(prices[strongBuy.index], hPeriod=22, 
                                          getPrediction=True)
strongBuy

In [0]:
buy = profiles[profiles['Action'] == 'Buy']
if len(buy.index) > 0:
    buy['STR'] = backtestMLmomentum(prices[buy.index], hPeriod=22, 
                                    getPrediction=True)
buy

## Checking what ML thinks about the assets in portfolio

In [0]:
backtestMLmomentum(prices[portfolio], hPeriod=22, getPrediction=True, opt=True)

## Getting ML forecasts for filtered assets

In [0]:
investable = profiles.loc[low_corr.index.to_list()]
investable['STR'] = backtestMLmomentum(prices[investable.index], hPeriod=22, 
                                       getPrediction=True, opt=True)

investable.sort_values(by='RET', ascending=False)

In [0]:
investable[investable['STR']==1].sort_values(by='RET', ascending=False)

In [0]:
diverse = profiles.loc[selAss]
diverse = diverse[diverse['Action']!='SELL']

diverse['STR'] = backtestMLmomentum(prices[diverse.index], hPeriod=22, 
                                    getPrediction=True, opt=True)
diverse = diverse[diverse['STR'] == 1].sort_values(by='RET')
diverse

In [0]:
sharpe = diverse[np.logical_and(diverse['Added Sharpe'] > max(0.0,profiles['Added Sharpe'].quantile(0.5)), 
                                diverse['Beta'] < profiles.loc[portfolio,'Beta'].min())]
sharpe[sharpe['Action']!='SELL']