In [None]:
# Pycache are evil, don't produce them
import sys
sys.dont_write_bytecode = True

import random

from portfolio_class import *

## Purpose of Strategies Functions
#### Return a dictionary with the structure: { [year, month] : [portfolio value, ROI (since the start of the portfolio)] }

In [None]:
nasdaq_df = get_dataframe()

In [None]:
def SP500(investment_date, divestment_date, dataframe):
    """
    DESCRIPTION:
        Calculate the monthly ROIs of a portfolio that invests in the 500 companies with the largest market cap.
        Be noted that the list of these 500 companies are dynamically calculated and changed monthly.
    
    INPUT SIGNATURE:
        1. investment_date (list): [year, month]
        2. divestment_date (list): [year, month]
        3. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. monthly_ROIs (dictionary): { [year, month] : [portfolio value, ROI (since the start of the portfolio)] }
            (first on the list, but will be returned last, index to this by using [-1])
        2. final_value (float): one of the final attribute of the porfolio on the divestment date
        3. final_liquid (float): like 2
        4. final_invested (float): like 2
        5. final_divested (float): like 2
        6. final_ROI (float percentage): like 2
    """

    # output dictionary
    monthly_ROIs = {}

    current_date = investment_date

    # list of all holdings within the portfolio updated monthly
    dynamic_holdings = []

    # create a portfolio object
    portfolio_SP500 = portfolio()

    while date_incrementor(current_date, divestment_date) != True:

        # get the monthly most valuable 500 companies
        monthly_largest_500 = the500(current_date, nasdaq_df)

        # buy new companies that reaches the top 500 or reinvest in companies that stay in the top 500
        for firm in monthly_largest_500:

            portfolio_SP500.transaction(firm, "BOUGHT", current_date, "Close")
            dynamic_holdings.append(firm) # here, we actually wants to append a firm multiple times in case it fall off the top 500
            # then we will need to sell mulitple shares of said firm

        # to remove companies properly from the portfolio while not affecting the for loop
        # we will create a copy of the list and rewrite our holdings onto it while not record sold firms
        new_holdings = []

        # sell companies that lose the top 500 position
        for firm in dynamic_holdings:

            if firm in monthly_largest_500:
                new_holdings.append(firm)

            else:
                portfolio_SP500.transaction(firm, "SOLD", current_date, "Close")
                # we do NOT call new_holdings.append(firm) here

        # update new dynamic_holdings
        dynamic_holdings = new_holdings

        # get monthly ROI
        monthly_attributes = portfolio_SP500.get_portfolio_value(current_date[0], current_date[1])
        this_month_ROI = monthly_attributes[-1]
        monthly_ROIs[current_date] = this_month_ROI

        current_date = date_incrementor(current_date, divestment_date)

    # when current_date == divestment_date, we hypothetically sell all holdings to check the ROI of the portfolio
    last_month_attributes = portfolio_SP500.get_portfolio_value(divestment_date[0], divestment_date[1])
    last_month_ROI = last_month_attributes[-1]
    monthly_ROIs[divestment_date] = last_month_ROI

    # final attributes to return
    final_value = last_month_attributes[0]
    final_liquid = last_month_attributes[1]
    final_invested = last_month_attributes[2]
    final_divested = last_month_attributes[3]
    final_ROI = last_month_ROI

    return final_value, final_liquid, final_invested, final_divested, final_ROI, monthly_ROIs

# Helper Functions

In [None]:
def calculate_market_values(date, dataframe):
    """
    DESCRIPTION:
        For a given month, calculate all the companies' market value and return them in descending order

    INPUT SIGNATURE:
        1. date (list): [year, month]
        2. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. market_value_df (Pandas dataframe): 2 columns Ticker and Market Value, sorted by Market Value descendingly
    """

    # work on a safe copy of the data
    copy_df = dataframe

    # final output list
    largest_500 = []

    # filter out the only month of interest
    conditioning_year = copy_df[(copy_df["Year"] != date[0])].index # is basically all the indices that needs to be dropped
    copy_df.drop(conditioning_year, inplace = True)
    conditioning_month = copy_df[(copy_df["Month"] != date[1])].index # is basically all the indices that needs to be dropped
    copy_df.drop(conditioning_month, inplace = True)

    # filter out companies that we don't have information on volume of shares
    with_volume_df = copy_df.loc[dataframe["Volume"] > 0]

    # empty list to store tickers
    ticker_symbols = []

    # empty list to store corresponding market value of the tickers
    market_values = []

    # NOTE: from the 2 lists above, we will create a Pandas dataframe with 2 columns, and sort descending by the market value column

    # loop through the filtered dataframe and calculate each company's market value
    for row in with_volume_df.index:

        ticker = with_volume_df["Ticker Symbol"][row]
        volume = with_volume_df["Volume"][row]
        close = with_volume_df["Close"][row] # we consider the closing price when finding out market cap

        market_value = close * volume

        # record the data
        ticker_symbols.append(ticker)
        market_values.append(market_value)

    # create a Pandas dataframe from the 2 lists to sort them
    data = {"Ticker" : ticker_symbols, "Market Value" : market_values}
    market_values_df = pd.DataFrame(data)

    # sort the dataframe
    market_values_df.sort_values(by = ["Market Value"], inplace = True, ascending = False)
    market_values_df.reset_index(inplace = True, drop =  True)

    return market_values_df

def the500(date, dataframe):
    """
    DESCRIPTION:
        Find the 500 companies with the largest market cap for the a given time (month within a year)

    INPUT SIGNATURE:
        1. date (list): [year, month]
        2. dataframe (Pandas dataframe): the source of our data, usually nasdaq_df

    OUTPUT SIGNATURE:
        1. largest_500 (list): a list contains tickers of those 500 companies sorted alphabetically
    """

    # list to populate and return
    largest_500 = []

    # our market
    sorted_market_df = calculate_market_values(date, dataframe)

    # get the 500 most valuable companies
    for row in range(500):
        ticker = sorted_market_df["Ticker"][row]
        largest_500.append(ticker)

    return largest_500

def date_incrementor(start_date, end_date):
    """
    DESCRIPTION:
        Help increment up a month and check whether the start_date is matched with the end date or not

    INPUT SIGNATURE:
        1. start_date (list): [year, month]
        2. end_date (list): [year, month]

    OUTPUT SIGNATURE:
        1. next_month (list): [year, month]
        2. if start_date == end_date, return True
    """

    if start_date == end_date:
        return True

    # create the variable to return
    next_month = []
    
    # store values so that they are readable
    start_year = start_date[0]
    start_month = start_date[1]
    end_year = end_date[0]
    end_month = end_date[1]

    if start_month == 12:
        next_month.append(start_year + 1)
        next_month.append(1)

    else:
        next_month.append(start_year)
        next_month.append(start_month + 1)

    return next_month

In [None]:
SP500([2013, 1], [2022, 1])

In [None]:
calculate_market_values([2016, 12], nasdaq_df)

In [None]:
the500([2016, 12], nasdaq_df)