In [None]:
from datetime import datetime
from typing import List

import pandas as pd

# Pipeline Functions

In [None]:
def pipeline(data: pd.DataFrame) -> pd.DataFrame:
    data = clean_data(data = data)

    data = filter_1(data = data)
    data = filter_2(data = data)
    data = filter_3(data = data)
    ...

    weights = optimizer(data = data)

    return weights

In [None]:
def filter_1(data: pd.DataFrame) -> pd.DataFrame:
    """
        Reduces the columns of the data dataframe
    """
    top_tickers = ...
    top_dataframe = rebuild_dataframe(raw_df = data, top_tickers = top_tickers)

    return top_dataframe

In [None]:
def filter_2(data: pd.DataFrame) -> pd.DataFrame:
    pass

In [None]:
def filter_3(data: pd.DataFrame) -> pd.DataFrame:
    pass

In [None]:
def optimizer(data: pd.DataFrame) -> pd.DataFrame:
    """
        Returns a dataframe of weights.
    """
    pass

In [None]:
def rebuild_dataframe(raw_df: pd.DataFrame, top_tickers: List[str]) -> pd.DataFrame:
    """
        To keep the levels of the dataframe.
    """
    processed_df = dict()

    for level in raw_df.columns.levels[0]:
        processed_df.update({level: raw_df[level][top_tickers]})

    return pd.concat(processed_df, axis = 1)

# Backtest Functions

In [None]:
def get_dates(data: pd.DataFrame, start_date: datetime, rebalance_interval: str) -> List[datetime]:
    """
        Returns a list of the date that a rebalance must occur.
    """
    table = {
        "daily"   : get_daily_dates,
        "weekly"  : get_weekly_dates,
        "biweekly": get_biweekly_dates,
        "monthly" : get_monthly_dates
    }

    dates = table.get(rebalance_interval)(data = data, start_date = start_date)

    return dates


def get_daily_dates(data: pd.DataFrame, start_date: datetime) -> list:
    dates = pd.to_datetime(data.index[data.index > start_date])

    return dates


def get_weekly_dates(data: pd.DataFrame, start_date: datetime) -> list:
    dates = get_daily_dates(data = data, start_date = start_date)
    dates = pd.to_datetime([date for date in dates if date.weekday() == 0])
    
    return dates


def get_biweekly_dates(data: pd.DataFrame, start_date: datetime) -> list:
    dates = get_weekly_dates(data = data, start_date = start_date)
    dates = dates[:: 2]                                                         # deletes every other date

    return dates


def get_monthly_dates(data: pd.DataFrame, start_date: datetime) -> list:
    dates = get_biweekly_dates(data = data, start_date = start_date)
    dates = dates[:: 2]

    return dates

In [None]:
def get_returns(data: pd.DataFrame, weights: pd.DataFrame) -> pd.DataFrame:
    """
        Returns a condensed 
    """
    pass

In [None]:
def backtest(data: pd.DataFrame, start_date: datetime, rebalance_interval: str) -> pd.DataFrame:
    weights = pd.DataFrame()

    dates = get_dates(data = data, start_date = start_date, rebalance_interval = rebalance_interval)

    for date in dates:
        date_weights = pipeline(data = data.loc[: date])
        weights = pd.concat([weights, date_weights], axis = 1)

    return weights

# Data Cleaning

In [None]:
def clean_data(data: pd.DataFrame) -> pd.DataFrame:
    cleaned_data = interpolate_data(data = data)
    cleaned_data = remove_outliers(data = data)
    cleaned_data = remove_splits(data = data)
    ...
    
    return cleaned_data

In [None]:
def interpolate_data(data: pd.DataFrame) -> pd.DataFrame:
    pass

In [None]:
def remove_outliers(data: pd.DataFrame) -> pd.DataFrame:
    pass

In [None]:
def remove_splits(data: pd.DataFrame) -> pd.DataFrame:
    pass

In [None]:
def remove_young_tickers(data: pd.DataFrame) -> pd.DataFrame:
    pass

# Main

In [None]:
data = pd.read_csv(...)

weights = backtest(data = data)
returns = get_returns(data = data, weights = weights)

# Visualize

In [2]:
import quantstats as qs

In [None]:
qs.plots.snapshot(returns = returns, title = "Portfolio Performance")