Stock Correlation Modelling

Notes for when you forget:

Yahoo back-adjusts historical stock prices when they implement a stock split.

In [55]:
import yfinance as yf
import pandas as pd
import numpy as np
from pylab import *
from math import *
import ast
import matplotlib.pyplot as plt

In [56]:
timeframe_start = "2021-01-01"
timeframe_end = "2026-01-01"
AAPL = yf.download("AAPL", start=timeframe_start, end=timeframe_end, progress=False)
MSFT = yf.download("MSFT", start=timeframe_start, end=timeframe_end, progress=False)

In [57]:
def RetrieveStockInfo(ticker_a):
    StockInfo = yf.download(ticker_a, start=timeframe_start, end=timeframe_end, progress=False)
    return(StockInfo)

In [58]:
def TwoStockStats(pandasdataset_a, pandasdataset_b):
    array_1_open_close = np.ravel(np.column_stack((pandasdataset_a['Open'], pandasdataset_a['Close'])))
    array_2_open_close = np.ravel(np.column_stack((pandasdataset_b['Open'], pandasdataset_b['Close'])))
    
    max_SPV_array = []                                                                    # SPv = Stock Price Value i.e what is the top
    max_SPV_array.append(np.max(array_1_open_close))                                      #       stock price ever of these two stocks
    max_SPV_array.append(np.max(array_2_open_close))
    max_SPV = float(max(max_SPV_array))

    
    return(max_SPV, array_1_open_close, array_2_open_close)

#array_1/2_open_close is the opens and closes each day interleaved i.e. Open Day 1, Close Day 1, Open Day 2, Close Day 2, ....

In [59]:
def DifferenceArray(pandasdataset_a):
    
    trading_days_set_1 = int((pandasdataset_a['Open'].count()).iloc[0])
    raw_diff_array = []
    percentage_diff_array = []
    for i in range (0, (trading_days_set_1 - 1)):
            open_price = float(pandasdataset_a.iloc[i,3])
            close_price = float(pandasdataset_a.iloc[i,0])
            raw_diff = close_price - open_price
            raw_diff_array.append(raw_diff)
            percentage_diff_array.append(((raw_diff)/open_price)*100)
    percentages_count = int(len(percentage_diff_array))
    return(raw_diff_array, percentage_diff_array, percentages_count)

In [60]:
def PercentageChangeCDF(pandasdataset_a, ticker_a):
    set_1_diff = DifferenceArray(pandasdataset_a)
    sorted_percentage_diff_list = set_1_diff[1]
    sorted_percentage_diff_list.sort()
    percentages_count = (set_1_diff[2])
    y_values = linspace(0, 1, percentages_count)
    plt.plot(sorted_percentage_diff_list, y_values)
    plt.xlim(sorted_percentage_diff_list[0], sorted_percentage_diff_list[(percentages_count - 1)])
    plt.ylim(0, 1)
    plt.title('CDF of Daily Percentage Changes in ' + ticker_a)
    plt.ylabel('Probabilty')
    plt.xlabel('Percentage Change (%)')
    plt.show()

In [61]:
def TwoStockPlot(ticker_a, ticker_b):
    TwoStockStatsArray = TwoStockStats(RetrieveStockInfo(ticker_a), RetrieveStockInfo(ticker_b))
    plt.plot(TwoStockStatsArray[1], label=str(ticker_a))
    plt.plot(TwoStockStatsArray[2], label=str(ticker_b))
    plt.title('Two Stock Plot: ' + ticker_a + ' vs ' + ticker_b)
    plt.xlabel('Number of Data Points')
    plt.ylabel('Stock Price ($)')
    plt.ylim(0,((TwoStockStatsArray[0])+(TwoStockStatsArray[0] * 0.05)))
    plt.legend(loc='upper left')
    plt.xlim(0)
    
    plt.show()

In [62]:
def TwoStockCorrelation(pandadataset_a, pandadataset_b, ticker_a, ticker_b, x, display1):
    
    set_1_diff = DifferenceArray(pandadataset_a)
    set_1_percentage_list = set_1_diff[1]
    set_1_percentage_count = set_1_diff[2]
    
    set_2_diff = DifferenceArray(pandadataset_b)
    set_2_percentage_list = set_2_diff[1]
    set_2_percentage_count = set_2_diff[2]

    if set_1_percentage_count > set_2_percentage_count:
        diff_in_count = int(set_1_percentage_count - set_2_percentage_count)
    else:
        diff_in_count = 0
    

    up_1 = 0
    down_1 = 0
    up_1_up_2 = 0
    down_1_up_2 = 0
    up_1_down_2 = 0
    down_1_down_2 = 0
    error_count = 0
    error_values = []

    for i in range (0, (set_1_percentage_count - x - diff_in_count)):
        if set_1_percentage_list[i] > 0 and set_2_percentage_list[(i+x)] > 0:
            up_1_up_2 = up_1_up_2 + 1
            up_1 = up_1 + 1
        elif set_1_percentage_list[i] < 0 and set_2_percentage_list[(i+x)] > 0:
            down_1_up_2 = down_1_up_2 + 1
            down_1 = down_1 + 1
        elif set_1_percentage_list[i] > 0 and set_2_percentage_list[(i+x)] < 0:
            up_1_down_2 = up_1_down_2 + 1
            up_1 = up_1 + 1
        elif set_1_percentage_list[i] < 0 and set_2_percentage_list[(i+x)] < 0:
            down_1_down_2 = down_1_down_2 + 1
            down_1 = down_1 + 1
        else: 
            error_count = error_count + 1
            error_values.append([set_1_percentage_list[i],set_2_percentage_list[(i+x)]])
    
    correlation_percentage_list = []
    error_stats = []
    correlation_percentage_list.append([((up_1_up_2)/(up_1))*100,up_1,up_1_up_2,0,0])
    correlation_percentage_list.append([((up_1_down_2)/(up_1))*100,up_1,up_1_down_2,0,1])
    correlation_percentage_list.append([((down_1_up_2)/(down_1))*100,down_1,down_1_up_2,1,0])
    correlation_percentage_list.append([((down_1_down_2)/(down_1))*100,down_1,down_1_down_2,1,1])
    error_stats.append(error_count)
    error_stats.append(((error_count)/(set_1_percentage_count - 1))*100)

    if display1 == 1:
        print("===============================================================================================================")
        print()
        print("When",ticker_a,"went UP one day,",ticker_b,"had a",correlation_percentage_list[0][0],"%  chance of going UP",x,"days later")
        print()
        print("When",ticker_a,"went UP one day,",ticker_b,"had a",correlation_percentage_list[1][0],"%  chance of going DOWN",x,"days later")    
        print()
        print("When",ticker_a,"went DOWN one day,",ticker_b,"had a",correlation_percentage_list[2][0],"%  chance of going UP",x,"days later")    
        print()
        print("When",ticker_a,"went DOWN one day,",ticker_b,"had a",correlation_percentage_list[3][0],"%  chance of going DOWN",x,"days later")
        print()
        print()
        print("We missed",error_stats[0],"trading days out of",set_1_percentage_count,", a",error_stats[1],"% error rate")
        print("===============================================================================================================")

    correlation_percentage_list.sort()
    return([correlation_percentage_list[3], ticker_a, ticker_b, x])
    

#What is the probability that if Set_1 went up the previous day, Set_2 is gonna go up today, or down?

In [63]:
def MassTSC (mindays,maxdays,file_name):
    stock_list = [
    "AAPL", "MSFT", "AMZN", "GOOGL", "TSLA",
    "NVDA", "ADBE", "NFLX", "INTC",
    "CSCO", "ORCL", "IBM", "CRM", "SAP",
    "TXN", "QCOM", "AVGO", "AMD", "MU",
    "AMAT", "NOW", "SNOW", "UBER",
    "LYFT", "SPOT", "PYPL", "MA",
    "V", "AXP", "COF", "PYPL", "BKNG",
    "DIS", "CMCSA", "T", "VZ", "TMUS",
    "SBUX", "MCD", "NKE", "HD", "LOW",
    "WMT", "COST", "TGT", "HD", "DG",
    "LULU", "TJX", "ROST", "BBY", "KSS",
    "CVS", "UNH", "CI",
    "HUM", "LLY", "PFE", "MRK", "ABBV",
    "BMY", "AMGN", "GILD", "DHR", "EW",
    "ISRG", "BDX", "ZBH", "RMD", "ABT",
    "GE", "CAT", "DE", "ETN", "EMR",
    "HON", "MMM", "BA", "LMT", "GD",
    "NOC", "SPG", "VNO", "PLD",
    "BLK", "SCHW", "MS", "JPM", "BAC",
    "C", "WFC", "GS", "TD", "BNS",
    "PNC", "USB", "TFC", "FITB", "KEY", "JNJ"
]
    stock_list_length = len(stock_list)
    stock_raw_data = []
    for h in range(0, stock_list_length):
        stock_raw_data.append(RetrieveStockInfo(stock_list[h]))
        print("Completed Download: ",stock_list[h])
    number_simulations = ((stock_list_length)**2)*(maxdays-mindays+1)
    masterlist_stockcomps = []
    for i in range(0,stock_list_length):
        for j in range(0, stock_list_length):
            for k in range(mindays, (maxdays+1)):
                masterlist_stockcomps.append(TwoStockCorrelation(stock_raw_data[i], stock_raw_data[j], stock_list[i],stock_list[j], k, 0))
                count = (i*((maxdays-mindays+1)*stock_list_length)) + (j*(maxdays-mindays+1)) + k
                print("Simulation", count, "out of", number_simulations, "completed")
    masterlist_stockcomps.sort(key=lambda x: x[0][0], reverse = True)

    with open(file_name, 'w') as f:
        for i in range(0,len(masterlist_stockcomps)):
            f.write(str(masterlist_stockcomps[i]) + "\n")

    with open("Mass_TSC_file_names", 'a') as f:
        f.write(str(file_name) + "\n")
    

In [64]:
def DetailedTSCDisplay(positive_x,positive_y,negative_x,negative_y,ticker_a,ticker_b,set_1_move,set_2_move,confirmation_percentage,
    positive_mean,positive_std,negative_mean,negative_std,dates_data,investment_value_list,original_stock_plot,all_stock2_change_length,
    x,initial_investment_3,investment_yoy_average_yield,risk_free_rate,investment_daily_volitility,investment_annualised_volitility,
    investment_sharpe_ratio,stock_yoy_average_yield,stock_daily_volitility,stock_annualised_volitility,stock_sharpe_ratio):

    plt.plot(positive_x, positive_y, linestyle='', marker='1', markersize=1, color='red', label='Correlation Affirming')
    plt.plot(negative_x, negative_y, linestyle='', marker='1', markersize=1, color='blue', label='Correlation Challenging')
    plt.xlabel("% change of "+ticker_a+" day 0")
    plt.ylabel("% change of "+ticker_b+" day "+str(x))
    plt.legend(loc='upper left')
    plt.show()
    
    print("When",ticker_a,"goes",set_1_move,",",ticker_b,"goes",set_2_move,"with a",confirmation_percentage,"% chance",x,"days after")
    print("")
    print("When",ticker_a,"goes",set_1_move,":")
    print(ticker_b,"goes up on average",positive_mean,"with a standard deviation of",positive_std)
    print(ticker_b,"goes down on average",negative_mean,"with a standard deviation of",negative_std)
    print("")
    print("===============================================================================================================")
    print("")
    
    plt.plot(dates_data, investment_value_list, linestyle='-', markersize=0, color='blue', label='Investment Strategy')    
    plt.plot(original_stock_plot, linestyle='-', markersize=0, color='red', label='Original Stock Value')
    plt.xlabel('Trading Days')
    plt.ylabel('Value')
    plt.xlim(0,all_stock2_change_length)
    plt.legend(loc='upper left')
    plt.show()
    
    print("The Red Line is if you invested in",ticker_b,x,"days after",ticker_a,"went",set_1_move)
    print("The Blue Line is if you just invested $" + str(initial_investment_3)+" at the start of the time frame")  
    print("Final investment value with strategy = $" + f"{investment_value_list[-1]:.2f}")
    print("Final investment without strategy = $" + f"{original_stock_plot[-1]:.2f}")        
    print("")
    print("===============================================================================================================")
    print("")
    
    print("Investment Statistics")
    print("The average yield YoY on this strategdy is "+ f"{(investment_yoy_average_yield*100):.2f}" + "%")
    print("YoY yield:", investment_yoy_average_yield)
    print("Risk free Rate:", risk_free_rate)
    print("Daily Volitility:", investment_daily_volitility)
    print("Annualised Volitility:", investment_annualised_volitility)
    print("The Sharpe Ratio is",investment_sharpe_ratio)
    print("")
    print("===============================================================================================================")
    print("")

    print("Stock Statistics")
    print("The average yield YoY without this strategdy is "+ f"{(stock_yoy_average_yield*100):.2f}" + "%")    
    print("YoY yield:", stock_yoy_average_yield)
    print("Risk free Rate:", risk_free_rate)
    print("Daily Volitility:", stock_daily_volitility)
    print("Annualised Volitility:", stock_annualised_volitility)
    print("The Sharpe Ratio is",stock_sharpe_ratio)
    print("")
    print("===============================================================================================================")
    print("")

In [65]:
def DetailedTSC(pandadataset_a, pandadataset_b, ticker_a, ticker_b, x,initial_investment_3,display1):
    set_1_diff = DifferenceArray(pandadataset_a)
    set_1_percentage_list = set_1_diff[1]                                                        #% change of stock each trading day
    set_1_percentage_count = set_1_diff[2]                                                       # number of days the stock changed
    
    set_2_diff = DifferenceArray(pandadataset_b)
    set_2_percentage_list = set_2_diff[1]
    set_2_percentage_count = set_2_diff[2]

    if set_1_percentage_count > set_2_percentage_count:
        diff_in_count = int(set_1_percentage_count - set_2_percentage_count)
    else:
        diff_in_count = 0
    
    up_1 = 0
    up_1_change = []
    up_1_date = []
    down_1 = 0
    down_1_change = []
    down_1_date = []
    up_1_up_2 = 0
    up_1_up_2_array = []
    down_1_up_2 = 0
    down_1_up_2_array = []
    up_1_down_2 = 0
    up_1_down_2_array = []
    down_1_down_2 = 0
    down_1_down_2_array = []
    all_stock2_change = []
    error_count = 0
    error_values = []

    for i in range (0, (set_1_percentage_count - x - diff_in_count)):
        if set_1_percentage_list[i] > 0 and set_2_percentage_list[(i+x)] > 0:
            up_1_up_2 = up_1_up_2 + 1
            up_1 = up_1 + 1
            up_1_up_2_array.append([set_1_percentage_list[i],set_2_percentage_list[(i+x)]])
            up_1_change.append(set_2_percentage_list[(i+x)])
            all_stock2_change.append(set_2_percentage_list[(i+x)])
            up_1_date.append(i)
        elif set_1_percentage_list[i] > 0 and set_2_percentage_list[(i+x)] < 0:
            up_1_down_2 = up_1_down_2 + 1
            up_1 = up_1 + 1
            up_1_down_2_array.append([set_1_percentage_list[i],set_2_percentage_list[(i+x)]])
            up_1_change.append(set_2_percentage_list[(i+x)])
            all_stock2_change.append(set_2_percentage_list[(i+x)])
            up_1_date.append(i)            
        elif set_1_percentage_list[i] < 0 and set_2_percentage_list[(i+x)] > 0:
            down_1_up_2 = down_1_up_2 + 1
            down_1 = down_1 + 1
            down_1_up_2_array.append([set_1_percentage_list[i],set_2_percentage_list[(i+x)]])
            down_1_change.append(set_2_percentage_list[(i+x)])
            all_stock2_change.append(set_2_percentage_list[(i+x)])
            down_1_date.append(i)
        elif set_1_percentage_list[i] < 0 and set_2_percentage_list[(i+x)] < 0:
            down_1_down_2 = down_1_down_2 + 1
            down_1 = down_1 + 1
            down_1_down_2_array.append([set_1_percentage_list[i],set_2_percentage_list[(i+x)]])
            down_1_change.append(set_2_percentage_list[(i+x)])
            all_stock2_change.append(set_2_percentage_list[(i+x)])
            down_1_date.append(i)
        else: 
            error_count = error_count + 1
            error_values.append([set_1_percentage_list[i],set_2_percentage_list[(i+x)]])
    
    correlation_percentage_list = []
    error_stats = []
    correlation_percentage_list.append([((up_1_up_2)/(up_1))*100,up_1,up_1_up_2,0,0])
    correlation_percentage_list.append([((up_1_down_2)/(up_1))*100,up_1,up_1_down_2,0,1])
    correlation_percentage_list.append([((down_1_up_2)/(down_1))*100,down_1,down_1_up_2,1,0])
    correlation_percentage_list.append([((down_1_down_2)/(down_1))*100,down_1,down_1_down_2,1,1])
    error_stats.append(error_count)
    error_stats.append(((error_count)/(set_1_percentage_count - 1))*100)
    correlation_percentage_list.sort()

    set_1_move = ""
    set_2_move =""
    if correlation_percentage_list[3][3] == 0:
        if correlation_percentage_list[3][4] == 0:
            positive_correlative_data = up_1_up_2_array
            negative_correlative_data = up_1_down_2_array
            set_1_move = "up"
            set_2_move = "up"
            movement_data = up_1_change
            dates_data = up_1_date
        elif correlation_percentage_list[3][4] == 1:
            positive_correlative_data = up_1_down_2_array
            negative_correlative_data = up_1_up_2_array
            set_1_move = "up"
            set_2_move = "down"
            movement_data = up_1_change
            dates_data = up_1_date            
    elif correlation_percentage_list[3][3] == 1:
        if correlation_percentage_list[3][4] == 0:
            positive_correlative_data = down_1_up_2_array
            negative_correlative_data = down_1_down_2_array
            set_1_move = "down"
            set_2_move = "up"
            movement_data = down_1_change
            dates_data = down_1_date
        elif correlation_percentage_list[3][4] == 1:
            positive_correlative_data = down_1_down_2_array
            negative_correlative_data = down_1_up_2_array
            set_1_move = "down"
            set_2_move = "down"
            movement_data = down_1_change
            dates_data = down_1_date

    positive_length = len(positive_correlative_data)
    positive_x = []                                                                          #% change in set 1
    positive_y = []                                                                          #related % change set 2 in favour of trend
    all_y = []
    
    for i in range(0,positive_length):
        positive_x.append(positive_correlative_data[i][0])
        positive_y.append(positive_correlative_data[i][1])
        all_y.append(positive_correlative_data[i][1])
    negative_length = len(negative_correlative_data)
    negative_x = []                                                                             #% change in set 1
    negative_y = []                                                                             #related % change set 2 against trend
    for i in range(0,negative_length):
        negative_x.append(negative_correlative_data[i][0])
        negative_y.append(negative_correlative_data[i][1])
        all_y.append(negative_correlative_data[i][1])

    confirmation_percentage = (positive_length/(positive_length + negative_length))*100
    positive_mean = mean(positive_y)
    positive_std = std(positive_y)
    negative_mean = mean(negative_y)
    negative_std = std(negative_y)
    
    initial_investment = initial_investment_3
    opportunity_length = len(movement_data)
    investment_value_list = []
    for i in range (0,opportunity_length):
        initial_investment = initial_investment*((100+(movement_data[i]))/100)
        investment_value_list.append(initial_investment)

    TwoStockStatsArray = TwoStockStats(RetrieveStockInfo(ticker_a), RetrieveStockInfo(ticker_b))
    original_stock_multiplyer = initial_investment_3/TwoStockStatsArray[2][0]
    all_stock2_change_length = int(len(TwoStockStatsArray[2])/2)
    original_stock_plot = []
    stock_movement_percentage_list = []
    for i in range (0,all_stock2_change_length):
        initial_investment_2 = (TwoStockStatsArray[2][(i*2)])*original_stock_multiplyer
        original_stock_plot.append(initial_investment_2)
        stock_movement_percentage = ((TwoStockStatsArray[2][((i*2)+1)]-TwoStockStatsArray[2][(i*2)])/TwoStockStatsArray[2][(i*2)])-1
        stock_movement_percentage_list.append(stock_movement_percentage)

    opportunity_length_list = linspace(1,opportunity_length,opportunity_length)
    stock_length_list = linspace(1,opportunity_length,all_stock2_change_length)

    investment_yoy_average_yield = (((investment_value_list[-1])/initial_investment_3)**(1/(all_stock2_change_length/252)))-1
    risk_free_rate = 0.027                                                                                      #Avg since 2015 = ~2.7%
    investment_movement_percentage_list = []
    for i in range(0, len(movement_data)):
        investment_movement_percentage = ((100+(movement_data[i]))/100)-1
        investment_movement_percentage_list.append(investment_movement_percentage)
    investment_daily_volitility = std(investment_movement_percentage_list)
    investment_annualised_volitility = investment_daily_volitility*(sqrt(252))                               #Assuming 252 trading days
    investment_sharpe_ratio = (investment_yoy_average_yield - risk_free_rate)/investment_annualised_volitility

    stock_yoy_average_yield = (((original_stock_plot[-1])/initial_investment_3)**(1/(all_stock2_change_length/252)))-1
    risk_free_rate = 0.027                                                                                      #Avg since 2015 = ~2.7%
    stock_daily_volitility = std(stock_movement_percentage_list)
    stock_annualised_volitility = stock_daily_volitility*(sqrt(252))                                      #Assuming 252 trading days
    stock_sharpe_ratio = (stock_yoy_average_yield - risk_free_rate)/stock_annualised_volitility

    if display1 == 1:
        DetailedTSCDisplay(positive_x,positive_y,negative_x,negative_y,ticker_a,ticker_b,set_1_move,set_2_move,confirmation_percentage,
        positive_mean,positive_std,negative_mean,negative_std,dates_data,investment_value_list,original_stock_plot,
        all_stock2_change_length,x,initial_investment_3,investment_yoy_average_yield,risk_free_rate,investment_daily_volitility,
        investment_annualised_volitility,investment_sharpe_ratio,stock_yoy_average_yield,stock_daily_volitility,
        stock_annualised_volitility,stock_sharpe_ratio)

    return([[investment_sharpe_ratio,ticker_a,ticker_b,x],dates_data,investment_value_list,original_stock_plot,correlation_percentage_list[3]])

In [66]:
def MassDetailedTSC(top_x, initial_investment_3, output_file_name, input_file_name):
    with open(str(input_file_name), "r") as file:
        MassTSC_content = file.read()
    MassTSC_content = MassTSC_content.splitlines()
    MassDTSC_Array = []
    for i in range(0,top_x):
        clean_MassTSC = ast.literal_eval(MassTSC_content[i])
        ticker_1 = clean_MassTSC[1]
        ticker_2 = clean_MassTSC[2]
        daysremoved = clean_MassTSC[3]
        ticker_set_1 = RetrieveStockInfo(ticker_1)
        ticker_set_2 = RetrieveStockInfo(ticker_2)
        MassDTSC_Array.append(DetailedTSC(ticker_set_1, ticker_set_2, ticker_1, ticker_2, daysremoved,initial_investment_3,0))
        print("DTSC", str(i+1), "out of", top_x, "successful")
    MassDTSC_Array.sort(key=lambda x: x[0][0], reverse=True)

    with open(output_file_name, 'w') as f:
        for i in range(0,len(MassDTSC_Array)):
            f.write(str(MassDTSC_Array[i]) + "\n")

    with open("MassDTSC_file_names", 'a') as f:
        f.write(str(output_file_name) + "\n")

In [67]:
def PortfolioConstruction(howmany, initial_investment, input_file_name):
    with open("TestMassDTSC22Feb1548", "r") as file:
        MassDTSC_content = file.read()
    MassDTSC_content = MassDTSC_content.splitlines()
    cleaned_MassDTSC = []
    for i in range(0,howmany):
        cleaning_MassDTSC = MassDTSC_content[i]
        cleaning_MassDTSC = eval(cleaning_MassDTSC)          #dates_data = 1, investment_value_list = 2, original_stock_plot = 3
        cleaned_MassDTSC.append(cleaning_MassDTSC)

    tradingdays = len(cleaned_MassDTSC[0][3])
    total_initial_investment = howmany*initial_investment
    total_investment_values = []
    stock_investment_values = []
    for i in range(0,(tradingdays)):
        total_investment_values.append(int(0))
        stock_investment_values.append(int(0))
    for i in range(0,howmany):                                                                                 #per cleaned DTSC array
        date_index_length = len(cleaned_MassDTSC[i][1])   
        count = 0                                                                                     #count is the position of the date in the array
        recent_investment_value = 100
        for j in range(0, tradingdays):                                                               #per trading day, going through it 1 by 1
            stock_investment_values[j] += cleaned_MassDTSC[i][3][j]
            if count < date_index_length and j == int(cleaned_MassDTSC[i][1][count]):
                recent_investment_value =  cleaned_MassDTSC[i][2][count]
                total_investment_values[j] += recent_investment_value
                count = count+1
            else:
                total_investment_values[j] += recent_investment_value
    
    plt.plot(total_investment_values, linestyle='-', markersize=0, color='blue', label='Investment Strategy')
    plt.plot(stock_investment_values, linestyle='-', markersize=0, color='red', label='Just Investing Day 0')
    plt.xlim(0,tradingdays)
    plt.ylim(0)
    plt.xlabel('Trading Days')
    plt.ylabel('Portfolio Value (Initial: $'+str(total_initial_investment)+')')
    plt.legend(loc='upper left')
    plt.show()

    investment_yoy_yield=(((total_investment_values[-1])-total_investment_values[0])/total_investment_values[0])*(1/(len(total_investment_values)/252))
    investment_percentage_list = []
    
    for i in range(0, tradingdays-1):
        percentage_diff = ((total_investment_values[i+1]-total_investment_values[i])/total_investment_values[i])
        investment_percentage_list.append(percentage_diff)
    risk_free_rate = 0.027
    investment_daily_volitility = std(investment_percentage_list)
    investment_annualised_volitility = investment_daily_volitility*(sqrt(252))
    investment_sharpe_ratio = (investment_yoy_yield - risk_free_rate)/investment_annualised_volitility

    print("This portfolio returns average of",str(investment_yoy_yield*100) + "% year on year")
    print("The Daily Volitility is ", investment_daily_volitility)
    print("The Annualised Volitility is ", investment_annualised_volitility)
    print("The Sharpe Ratio of this portfolio =", (investment_sharpe_ratio/3))
    print("")






In [68]:
def InputFileNames(file_name):
    with open(str(file_name), "r") as file:
        file_content = file.read()
    if file_content is not None: 
        file_content = file_content.splitlines()
        print("\n" + "Here are the options of files you can select:")
        for i in range(0, len(file_content)):
            print(str(i) +")", file_content[i])
        print("\n" + "Which file would you like to chose? ")
        file_choice = int(input("Make sure to type the number ( #) ) not the name: "))
        return(str(file_content[file_choice]))
    else:
        print("Sorry, no files available. Perhaps you skipped a step?")

In [None]:
def MainRun():
    print("Stock Correlation Tester" + "\n")
    print("Want to build a portfolio?")
    print("Step 1: Carry out a Mass Two Stock Correlation (#3): This checks corralitive nature of 100 widely-traded stocks")
    print("Step 2: Then carry out a Mass Detailed Two Stock Correlation (#4) on the highest correlated stocks")
    print("Shep 3: Then do Portfolio Construction (#6): This constructs portfolio from the MassDTSC based on the highest Sharpe Ratios" + "\n")
    print("Do you want to change any assumed constants?  (y/n)")
    change_choice = input("")
    
    print("What analysis do you want?")
    print("1) Two Stock Plot")
    print("2) Two Stock Correlation")
    print("3) Mass Two Stock Correlation")
    print("4) Detailed Two Stock Correlation")
    print("5) Mass Detailed TSC")
    print("6) Portfolio Construction")
    analysis_choice = int(input(""))
    
    if analysis_choice == 1: 
        ticker_1 = str(input("Ticker of 1st Stock: "))
        ticker_2 = str(input("Ticker of 2nd Stock: "))
        ticker_set_a = RetrieveStockInfo(ticker_1)
        ticker_set_b = RetrieveStockInfo(ticker_2)
        TwoStockPlot(ticker_1, ticker_2)
    elif analysis_choice == 2:
        days_removed = int(input("How many days removed do you want the comparison to be? "))
        ticker_1 = str(input("Ticker of 1st Stock: "))
        ticker_2 = str(input("Ticker of 2nd Stock: "))
        ticker_set_1 = RetrieveStockInfo(ticker_1)
        ticker_set_2 = RetrieveStockInfo(ticker_2)
        TwoStockCorrelation(ticker_set_1, ticker_set_2, ticker_1, ticker_2, days_removed, 1)
    elif analysis_choice == 3:
        mindaysout = int(input("What is the min days removed you want the comparison to be? "))
        maxdaysout = int(input("What is the max days removed you want the comparison to be? "))
        file_name = str(input("What would you like the file name to me? "))
        MassTSC(mindaysout,maxdaysout,file_name)
    elif analysis_choice == 4:
        days_removed = int(input("How many days removed do you want the comparison to be? "))
        ticker_1 = str(input("Ticker of 1st Stock: "))
        ticker_2 = str(input("Ticker of 2nd Stock: "))
        initial_investment_3 = int(input("Specify initial investment: "))
        ticker_set_1 = RetrieveStockInfo(ticker_1)
        ticker_set_2 = RetrieveStockInfo(ticker_2)
        DetailedTSC(ticker_set_1, ticker_set_2, ticker_1, ticker_2, days_removed,initial_investment_3, 1)
    elif analysis_choice == 5:
        input_file_name = InputFileNames("Mass_TSC_file_names")
        top_x = int(input("How many of the top correlations do you want to examine? "))
        initial_investment_3 = int(input("Specify initial investment: "))
        output_file_name = input("What do you want the file name to be? ")
        MassDetailedTSC(top_x, initial_investment_3, output_file_name, input_file_name)      
    elif analysis_choice == 6:
        input_file_name = InputFileNames("MassDTSC_file_names")
        howmany = int(input("How many correlations would you like in your portfolio? "))
        initial_investment = int(input("How much would you like to invest per stock? "))
        PortfolioConstruction(howmany, initial_investment, input_file_name)
    else: 
        print("Sorry, did not understand that input, could you try again") 

In [None]:
MainRun()