In [None]:
# Pycache are evil, don't produce them
import sys
sys.dont_write_bytecode = True

import random

from helper import *
from portfolio_class import *

## Purpose of Strategies Functions
#### Return a dictionary with the structure: { [year, month] : [portfolio value, ROI (since the start of the portfolio)] }

In [None]:
nasdaq_df = get_dataframe()
nasdaq_df

In [None]:
nasdaq_df

In [None]:
def SP500_monthly(investment_date, divestment_date, dataframe):
    """
    WARNING: VERY TIME-CONSUMING IF A LARGE TIME FRAME IS REQUESTED. USE SP100_yearly INSTEAD

    DESCRIPTION:
        Calculate the monthly ROIs of a portfolio that invests in the 500 companies with the largest market cap.
        Be noted that the list of these 500 companies are dynamically calculated and changed monthly.
    
    INPUT SIGNATURE:
        1. investment_date (list): [year, month]
        2. divestment_date (list): [year, month]
        3. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. monthly_ROIs_time (dictionary): { "year-month" : [portfolio value, ROI (since the start of the portfolio)] }
            (first on the list, but will be returned last, index to this by using [-1])
        2. final_value (float): one of the final attribute of the porfolio on the divestment date
        3. final_liquid (float): like 2
        4. final_invested (float): like 2
        5. final_divested (float): like 2
        6. final_ROI (float percentage): like 2
    """

    # output dictionary
    monthly_ROIs = {}

    current_date = investment_date

    # list of all holdings within the portfolio updated monthly
    dynamic_holdings = []

    # create a portfolio object
    portfolio_SP500 = portfolio()

    while date_incrementor(current_date, divestment_date) != True:

        # DEBUG
        # print("\nCurrent Date: ", current_date)
        # print("\nDataframe Status: ", dataframe)

        # get the monthly most valuable 500 companies
        monthly_largest_500 = theXXX(500, current_date, dataframe)

        # buy new companies that reaches the top 500 or reinvest in companies that stay in the top 500
        for firm in monthly_largest_500:

            portfolio_SP500.transaction(firm, "BOUGHT", current_date, "Close")
            dynamic_holdings.append(firm) # here, we actually wants to append a firm multiple times in case it fall off the top 500
            # then we will need to sell mulitple shares of said firm

        # to remove companies properly from the portfolio while not affecting the for loop
        # we will create a copy of the list and rewrite our holdings onto it while not record sold firms
        new_holdings = []

        # sell companies that lose the top 500 position
        for firm in dynamic_holdings:

            if firm in monthly_largest_500:
                new_holdings.append(firm)

            else:
                portfolio_SP500.transaction(firm, "SOLD", current_date, "Close")
                # we do NOT call new_holdings.append(firm) here

        # update new dynamic_holdings
        dynamic_holdings = new_holdings

        # get monthly ROI
        monthly_attributes = portfolio_SP500.get_portfolio_value(current_date[0], current_date[1])
        this_month_ROI = monthly_attributes[-1]

        # convert list to string to store in a dictionary
        current_date_string = str(current_date[0]) + "-" + str(current_date[1])
        monthly_ROIs[current_date_string] = this_month_ROI

        current_date = date_incrementor(current_date, divestment_date)

    # when current_date == divestment_date, we hypothetically sell all holdings to check the ROI of the portfolio
    last_month_attributes = portfolio_SP500.get_portfolio_value(divestment_date[0], divestment_date[1])
    last_month_ROI = last_month_attributes[-1]

    # convert list to string to store in a dictionary
    divestment_date_string = str(divestment_date[0]) + "-" + str(divestment_date[1])
    monthly_ROIs[divestment_date_string] = last_month_ROI

    # final attributes to return
    final_value = last_month_attributes[0]
    final_liquid = last_month_attributes[1]
    final_invested = last_month_attributes[2]
    final_divested = last_month_attributes[3]
    final_ROI = last_month_ROI

    return final_value, final_liquid, final_invested, final_divested, final_ROI, monthly_ROIs

#------------------------------

def SP100_yearly(investment_year, divestment_year, dataframe):
    """
    DESCRIPTION:
        Calculate the monthly ROIs of a portfolio that invests in the 500 companies with the largest market cap.
        Be noted that the list of these 500 companies are dynamically calculated and changed monthly.
    
    INPUT SIGNATURE:
        1. investment_year (int)
        2. divestment_year (int)
        3. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. yearly_ROIs_time (dictionary): { "year-month" : [portfolio value, ROI (since the start of the portfolio)] }
            (first on the list, but will be returned last, index to this by using [-1])
        2. final_value (float): one of the final attribute of the porfolio on the divestment date
        3. final_liquid (float): like 2
        4. final_invested (float): like 2
        5. final_divested (float): like 2
        6. final_ROI (float percentage): like 2
    """

    # take into account for earliest and latest available data
    latest_month = 12

    if divestment_year == 2022:
        latest_month = 4

    # output dictionary
    yearly_ROIs = {}

    current_year = investment_year

    # list of all holdings within the portfolio updated monthly
    dynamic_holdings = []

    # create a portfolio object
    portfolio_SP100 = portfolio()

    while current_year != divestment_year:

        # get the monthly most valuable 500 companies
        yearly_largest_100 = theXXX(100, [current_year, 1], dataframe)

        # buy new companies that reaches the top 500 or reinvest in companies that stay in the top 500
        for firm in yearly_largest_100:

            portfolio_SP100.transaction(firm, "BOUGHT", [current_year, 1], "Close")
            dynamic_holdings.append(firm) # here, we actually wants to append a firm multiple times in case it fall off the top 500
            # then we will need to sell mulitple shares of said firm

        # to remove companies properly from the portfolio while not affecting the for loop
        # we will create a copy of the list and rewrite our holdings onto it while not record sold firms
        new_holdings = []

        # sell companies that lose the top 500 position
        for firm in dynamic_holdings:

            if firm in yearly_largest_100:
                new_holdings.append(firm)

            else:
                portfolio_SP100.transaction(firm, "SOLD", [current_year, 1], "Close")
                # we do NOT call new_holdings.append(firm) here

        # update new dynamic_holdings
        dynamic_holdings = new_holdings

        # get yearly ROI
        yearly_attributes = portfolio_SP100.get_portfolio_value(current_year, 12)
        this_year_ROI = yearly_attributes[-1]
        yearly_ROIs[current_year] = this_year_ROI

        current_year += 1

    # when current_year == divestment_year, we hypothetically sell all holdings to check the ROI of the portfolio
    last_year_attributes = portfolio_SP100.get_portfolio_value(divestment_year, latest_month)
    last_year_ROI = last_year_attributes[-1]

    # add the latest year to the dictionary as well
    yearly_ROIs[divestment_year] = last_year_ROI

    # final attributes to return
    final_value = last_year_attributes[0]
    final_liquid = last_year_attributes[1]
    final_invested = last_year_attributes[2]
    final_divested = last_year_attributes[3]
    final_ROI = last_year_ROI

    return final_value, final_liquid, final_invested, final_divested, final_ROI, yearly_ROIs

#------------------------------

def calculate_market_values(date, dataframe):
    """
    DESCRIPTION:
        For a given month, calculate all the companies' market value and return them in descending order

    INPUT SIGNATURE:
        1. date (list): [year, month]
        2. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. market_value_df (Pandas dataframe): 2 columns Ticker and Market Value, sorted by Market Value descendingly
    """

    # work on a safe copy of the data
    copy_df = dataframe.copy()

    # filter out the only month of interest
    conditioning_year = copy_df[(copy_df["Year"] != date[0])].index # is basically all the indices that needs to be dropped
    copy_df.drop(conditioning_year, inplace = True)
    conditioning_month = copy_df[(copy_df["Month"] != date[1])].index # is basically all the indices that needs to be dropped
    copy_df.drop(conditioning_month, inplace = True)

    # filter out companies that we don't have information on volume of shares
    with_volume_df = copy_df.loc[dataframe["Volume"] > 0]

    # empty list to store tickers
    ticker_symbols = []

    # empty list to store corresponding market value of the tickers
    market_values = []

    # NOTE: from the 2 lists above, we will create a Pandas dataframe with 2 columns, and sort descending by the market value column

    # loop through the filtered dataframe and calculate each company's market value
    for row in with_volume_df.index:

        ticker = with_volume_df["Ticker Symbol"][row]
        volume = with_volume_df["Volume"][row]
        close = with_volume_df["Close"][row] # we consider the closing price when finding out market cap

        market_value = close * volume

        # record the data
        ticker_symbols.append(ticker)
        market_values.append(market_value)

    # create a Pandas dataframe from the 2 lists to sort them
    data = {"Ticker" : ticker_symbols, "Market Value" : market_values}
    market_values_df = pd.DataFrame(data)

    # sort the dataframe
    market_values_df.sort_values(by = ["Market Value"], inplace = True, ascending = False)
    market_values_df.reset_index(inplace = True)

    return market_values_df

#------------------------------

def theXXX(number, date, dataframe):
    """
    DESCRIPTION:
        Find the XXX companies with the largest market cap for the a given time (month within a year)

    INPUT SIGNATURE:
        1. number (int): top 500? top 100?
        2. date (list): [year, month]
        3. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. largest_XXX (list): a list contains tickers of those XXX companies sorted alphabetically
    """

    # list to populate and return
    largest_XXX = []

    # our market
    sorted_market_df = calculate_market_values(date, dataframe)

    # get the 500 most valuable companies
    for row in range(number):
        ticker = sorted_market_df["Ticker"].iloc[row]
        largest_XXX.append(ticker)

    return largest_XXX

#------------------------------

def date_incrementor(start_date, end_date):
    """
    DESCRIPTION:
        Help increment up a month and check whether the start_date is matched with the end date or not

    INPUT SIGNATURE:
        1. start_date (list): [year, month]
        2. end_date (list): [year, month]

    OUTPUT SIGNATURE:
        1. next_month (list): [year, month]
        2. if start_date == end_date, return True
    """

    if start_date == end_date:
        return True

    # create the variable to return
    next_month = []
    
    # store values so that they are readable
    start_year = start_date[0]
    start_month = start_date[1]
    end_year = end_date[0]
    end_month = end_date[1]

    if start_month == 12:
        next_month.append(start_year + 1)
        next_month.append(1)

    else:
        next_month.append(start_year)
        next_month.append(start_month + 1)

    return next_month

In [None]:
output = SP100_yearly(2012, 2022, nasdaq_df)

yearly_ROI = output[-1]

years = list(yearly_ROI.keys())
years.sort()

corresponding_ROIs = []

# create a dataframe from the yearly_ROI dictionary
for key in years:

    ROI = yearly_ROI[key]
    corresponding_ROIs.append(ROI)

data = {"Year":years, "ROI":corresponding_ROIs}

yearly_ROI_df = pd.DataFrame(data)

# export the data
yearly_ROI_df.to_csv("Internal Data/SP100_yearly_ROI.csv", index = False)

In [None]:
output

In [None]:
# test short output
output_short = SP100_yearly(2020, 2022, nasdaq_df)
output_short

In [None]:
inspect("TWIN", [2013, 12], "Close", nasdaq_df)

In [None]:
nasdaq_df.loc[(nasdaq_df["Ticker Symbol"] == "TWIN") & (nasdaq_df["Year"] == 2013)]

In [None]:
print("Portfolio's Performance if Invested Dynamically in the 500 Companies with the Largest Market Cap (Update Listing Yearly)\
    \n\nPortfolio's Value Upon Immediate Liquidation:\n$", output[0],\
    "\n\nCash Flow (from $0):\n$", output[1],\
    "\n\nTotal Amount Invested:\n$", output[2],\
    "\n\nTotal Amount Divested:\n$", output[3],\
    "\n\nReturn on Investment Since the Initialization of the Portfolio:\n%", output[4],\
    "\n\nDetailed yearly ROI (%):\n", output[5])

In [None]:
calculate_market_values([2016, 12], nasdaq_df)

In [None]:
theXXX(100, [2016, 12], nasdaq_df)

# SP100 BUT USING NEW GET_YEARLY_ROI METHOD WITHIN THE PORTFOLIO CLASS

In [None]:
# Pycache are evil, don't produce them
import sys
sys.dont_write_bytecode = True

import random

from helper import *
from portfolio_class import *
from strategies_helper import *

In [None]:
nasdaq_df = get_dataframe()

In [None]:
def SP100_yearly_new_method(investment_year, divestment_year, dataframe):
    """
    DESCRIPTION:
        Calculate the monthly ROIs of a portfolio that invests in the 500 companies with the largest market cap.
        Be noted that the list of these 500 companies are dynamically calculated and changed monthly.
    
    INPUT SIGNATURE:
        1. investment_year (int)
        2. divestment_year (int)
        3. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. yearly_ROIs_time (dictionary): { "year-month" : [portfolio value, ROI (since the start of the portfolio)] }
            (first on the list, but will be returned last, index to this by using [-1])
        2. final_value (float): one of the final attribute of the porfolio on the divestment date
        3. final_liquid (float): like 2
        4. final_invested (float): like 2
        5. final_divested (float): like 2
        6. final_ROI (float percentage): like 2
    """

    # take into account for earliest and latest available data
    latest_month = 12

    if divestment_year == 2022:
        latest_month = 4

    # output dictionary
    yearly_ROIs = {}

    current_year = investment_year

    # list of all holdings within the portfolio updated monthly
    dynamic_holdings = []

    # create a portfolio object
    portfolio_SP100 = portfolio()

    while current_year != divestment_year:

        # get the monthly most valuable 500 companies
        yearly_largest_100 = theXXX(100, [current_year, 1], dataframe)

        # buy new companies that reaches the top 500 or reinvest in companies that stay in the top 500
        for firm in yearly_largest_100:

            portfolio_SP100.transaction(firm, "BOUGHT", [current_year, 1], "Close")
            dynamic_holdings.append(firm) # here, we actually wants to append a firm multiple times in case it fall off the top 500
            # then we will need to sell mulitple shares of said firm

        # to remove companies properly from the portfolio while not affecting the for loop
        # we will create a copy of the list and rewrite our holdings onto it while not record sold firms
        new_holdings = []

        # sell companies that lose the top 500 position
        for firm in dynamic_holdings:

            if firm in yearly_largest_100:
                new_holdings.append(firm)

            else:
                portfolio_SP100.transaction(firm, "SOLD", [current_year, 1], "Close")
                # we do NOT call new_holdings.append(firm) here

        # update new dynamic_holdings
        dynamic_holdings = new_holdings

        current_year += 1

    # yearly ROI
    return portfolio_SP100.get_yearly_ROI(divestment_year, write_csv = True)

In [None]:
yearly_ROI_df, yearly_ROI_dict = SP100_yearly_new_method(2019, 2020, nasdaq_df)

In [None]:
yearly_ROI_df

In [None]:
yearly_ROI_dict