In [74]:
import pandas as pd
import numpy as np
from scipy.stats import t
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt 
from sklearn.metrics import confusion_matrix

def cutWeek(weekNumber, data):
    weekdata = []
    for i in range(weekNumber):
         temp = data[data.Week_Number == i]
         temp = temp.reset_index(drop=True)
         weekdata.append(temp)
    return weekdata

def labelMapping(year, week, label):
    labelMap = {}
    for (y, w, l) in zip(year, week, label):
        key = (y, w)
        value = l
        labelMap[key] = value
    return labelMap

def proficCalculator(data, fund):
    # Week 0 case
    week1Data = data[0]
    week1Label = week1Data.Label[0] # week 0 label

    if week1Label == 1:
        stock = True
        buyPrice = week1Data.Close[0] # week 0 first day price
        sellPrice = week1Data.Close[len(week1Data)-1] # week 0 last day price
    else:
        stock = False
        buyPrice = week1Data.Close[len(week1Data)-1] # week 0 last day price
        sellPrice = week1Data.Close[len(week1Data)-1] # week 0 last day price


    for df in data[1:]:
        nextWeekColor = df.Label[0]
        nextClosePrice = df.Close[len(df)-1]

        # stock + green = no action
        if (stock == True) and (nextWeekColor == 1):
            stock == True # Keep holding the stock
            buyPrice = buyPrice # Buy point stay
            sellPrice = nextClosePrice # Sell point move forward

        # stock + red = sell
        elif (stock == True) and (nextWeekColor == 0):
            r = 1 + (sellPrice - buyPrice) / sellPrice
            fund = fund * r
            buyPrice = nextClosePrice
            sellPrice = nextClosePrice
            stock = False
            
        # money + green = buy stock
        elif (stock == False) and (nextWeekColor == 1):
            buyPrice = buyPrice
            sellPrice = nextClosePrice
            stock = True
        # money + red = no action
        elif (stock == False) and (nextWeekColor == 0):
            buyPrice = nextClosePrice
            sellPrice = nextClosePrice
            stock = False

    # Last withdraw
    r = 1 + (sellPrice - buyPrice) / sellPrice
    fund = fund * r
    return fund

In [75]:
dfLabel = pd.read_csv('./GOOGL_weekly_return_volatility.csv')
year1 = dfLabel[dfLabel.Year == 2019]
year2 = dfLabel[dfLabel.Year == 2020]
year1

Unnamed: 0,Year,Week_Number,mean_return,volatility,label
0,2019,0,0.786333,4.007779,1
1,2019,1,-0.2512,0.782992,0
2,2019,2,0.8026,1.639555,1
3,2019,3,-0.119,1.783558,0
4,2019,4,0.3264,2.111818,1
5,2019,5,-0.2786,1.825005,0
6,2019,6,0.3164,1.17948,1
7,2019,7,-0.064,1.158254,0
8,2019,8,0.5684,0.787367,1
9,2019,9,0.029,0.955047,0


In [76]:
# year1 provide paramieters
yesData = year1[year1.label == 1]
noData = year1[year1.label == 0]

pYes = len(yesData) / (len(year1))
pNo = len(noData) / len(year1)

muF1Yes, sigF1Yes = yesData.mean_return.mean(), yesData.mean_return.std()
muF2Yes, sigF2Yes = yesData.volatility.mean(), yesData.volatility.std()

muF1No, sigF1No = noData.mean_return.mean(), yesData.mean_return.std()
muF2No, sigF2No = noData.volatility.mean(), yesData.volatility.std()

# Predict year 2 label
yTest = year2.label
f1 = year2.mean_return
f2 = year2.volatility



yPredict = []
dfree = 0.5
for (f1Test, f2Test) in zip(f1, f2):


    pF1Yes = t.pdf(x = f1Test, df = dfree, loc = muF1Yes, scale = sigF1Yes) 
    pF2Yes = t.pdf(x = f2Test, df = dfree, loc = muF2Yes, scale = sigF2Yes)

    pF1No = t.pdf(x = f1Test, df = dfree, loc = muF1No, scale = sigF1No)
    pF2No = t.pdf(x = f2Test, df = dfree, loc = muF2No, scale = sigF2No)

    pY = pF1Yes * pF2Yes * pYes
    pN = pF1No * pF2No * pNo

    if pY >= pN:
        yPredict.append(1)
    else:
        yPredict.append(0)

print(accuracy_score(yTest, yPredict))
print(pY, pN)

## Confusion Matrix I choose 
temp = confusion_matrix(yTest, yPredict)
print(temp)

tn = temp[0][0]
fn = temp[1][0]
tp = temp[1][1]
fp = temp[0][1]

tpr = tp / (tp + fn)
tnr = tn / (tn + fp)

print('TPR = {}, TNR = {}'.format(tpr, tnr))




0.7924528301886793
0.020383396835891302 0.030197016380249936
[[19  6]
 [ 5 23]]
TPR = 0.8214285714285714, TNR = 0.76


In [77]:
yPredict = []
dfree = 1

for (f1Test, f2Test) in zip(f1, f2):


    pF1Yes = t.pdf(x = f1Test, df = dfree, loc = muF1Yes, scale = sigF1Yes) 
    pF2Yes = t.pdf(x = f2Test, df = dfree, loc = muF2Yes, scale = sigF2Yes)

    pF1No = t.pdf(x = f1Test, df = dfree, loc = muF1No, scale = sigF1No)
    pF2No = t.pdf(x = f2Test, df = dfree, loc = muF2No, scale = sigF2No)

    pY = pF1Yes * pF2Yes * pYes
    pN = pF1No * pF2No * pNo

    if pY >= pN:
        yPredict.append(1)
    else:
        yPredict.append(0)

print(accuracy_score(yTest, yPredict))
print(pY, pN)

## Confusion Matrix I choose 
temp = confusion_matrix(yTest, yPredict)
print(temp)

tn = temp[0][0]
fn = temp[1][0]
tp = temp[1][1]
fp = temp[0][1]

tpr = tp / (tp + fn)
tnr = tn / (tn + fp)

print('TPR = {}, TNR = {}'.format(tpr, tnr))


0.7924528301886793
0.036566590987857826 0.052524982181426455
[[19  6]
 [ 5 23]]
TPR = 0.8214285714285714, TNR = 0.76


In [78]:

yPredict = []
dfree = 5
for (f1Test, f2Test) in zip(f1, f2):


    pF1Yes = t.pdf(x = f1Test, df = dfree, loc = muF1Yes, scale = sigF1Yes) 
    pF2Yes = t.pdf(x = f2Test, df = dfree, loc = muF2Yes, scale = sigF2Yes)

    pF1No = t.pdf(x = f1Test, df = dfree, loc = muF1No, scale = sigF1No)
    pF2No = t.pdf(x = f2Test, df = dfree, loc = muF2No, scale = sigF2No)

    pY = pF1Yes * pF2Yes * pYes
    pN = pF1No * pF2No * pNo

    if pY >= pN:
        yPredict.append(1)
    else:
        yPredict.append(0)

print(accuracy_score(yTest, yPredict))
print(pY, pN)


## Confusion Matrix I choose 
temp = confusion_matrix(yTest, yPredict)
print(temp)

tn = temp[0][0]
fn = temp[1][0]
tp = temp[1][1]
fp = temp[0][1]

tpr = tp / (tp + fn)
tnr = tn / (tn + fp)

print('TPR = {}, TNR = {}'.format(tpr, tnr))





0.7924528301886793
0.06860962839991763 0.09423087601272048
[[19  6]
 [ 5 23]]
TPR = 0.8214285714285714, TNR = 0.76


In [79]:
# Strategy check
dfDetail = pd.read_csv('./GOOGL_weekly_return_volatility_detailed.csv')
year2Detail = dfDetail[dfDetail.Year == 2020]
year2Detail = year2Detail.reset_index(drop = True)

## Add label to detail
lMap = labelMapping(year2.Year, year2.Week_Number, yPredict)
temp = []
for (y, w) in zip(year2Detail.Year, year2Detail.Week_Number):
    key = (y, w)
    temp.append(lMap[key]) 
year2Detail['Label'] = temp
year2Detail = year2Detail[['Year', 'Week_Number', 'Close', 'Label']]

## Cut goo2020
goo2020Week = cutWeek(53, year2Detail)




## trading 
total = proficCalculator(goo2020Week, 100)
print("Using Label: {}".format(total))

## trding BH
firstWeek = goo2020Week[0]
firstClose = firstWeek.Close[0]

lastWeek = goo2020Week[-1]
lastClose = lastWeek.Close[len(lastWeek)-1]

r = 1 + (lastClose - firstClose) / lastClose
total = 100 * r
print("Buy on first day and Sell on last day: {}".format(total))



Using Label: 239.64550014107087
Buy on first day and Sell on last day: 121.17033527942765
