**Repository**: https://github.com/EstebanMqz/SP500-Risk-Optimized-Portfolios-PostCovid-ML

### <font color= 'orange'> **S & P 500 Risk Optimized Portfolios PostCovid ML (14 Apr $yr_{(2020-2023)}$)**<font> 

In [2]:
#Import dependencies
import functions as fn
import data as dt
import visualizations as vs

#Libraries
import numpy as np
import pandas as pd
import matplotlib as plt

import scipy
import scipy.stats as st
from scipy import optimize

import sklearn
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

from yahoofinancials import YahooFinancials 
from tabulate import tabulate
import IPython.display as d

import datetime 
import time

import warnings
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=UserWarning)

##### <span style='color:red'> Project Creators:</span> Create requirements.txt file.<br>

In [None]:
docstring = """
# -- --------------------------------------------------------------------------------------------------  -- # 
# -- project: S&P500-Risk-Optimized-Portfolios-PostCovid-ML                                              -- # 
# -- script: requirements.txt: txt file to download Python modules for execution                         -- # 
# -- author: EstebanMqz                                                                                  -- # 
# -- license: CC BY 3.0                                                                                  -- # 
# -- repository: SP500-Risk-Optimized-Portfolios-PostCovid-ML/blob/main/requirements.txt                 -- #                                  
# -- --------------------------------------------------------------------------------------------------  -- # 
\n
"""

fn.get_requirements(docstring)

##### <span style='color:green'> Project Users:</span> Install libraries in requirements.txt file.<br>

In [None]:
dt.library_install("requirements.txt")

Yahoo Finance data fetching is limited and other data sources like quandl (with an API key) or MT5 for Criptos/Forex can be used. <br>
Fetching data from Yahoo Finance by batches could be a solution, these are the tickers from A-Z: <br> 

In [14]:
tickers=fn.SP500_tickers(10)
tickers[0][:] , tickers[49][:]

(['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 'AAP'],
 ['WY', 'WHR', 'WMB', 'WTW', 'GWW', 'WYNN', 'XEL', 'XYL', 'YUM', 'ZBRA'])

In [11]:
data = pd.concat([dt.get_historical_price_data(tickers[0][i], 3) for i in range (0, len(tickers[0]))], axis=1)

In [12]:
data

Unnamed: 0_level_0,MMM,AOS,ABT,ABBV,ACN,ATVI,ADM,ADBE,ADP,AAP
formatted_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-05-01,132.659515,39.439064,85.580437,72.788918,172.786255,64.027557,32.627087,343.839996,132.085907,108.905914
2020-05-04,132.480988,39.751541,85.561401,71.927818,172.248993,65.986374,32.292645,349.109985,135.904922,112.193924
2020-05-05,131.614990,39.921982,88.609329,75.011955,174.254211,67.796799,32.478439,356.130005,137.165421,113.202751
2020-05-06,130.516937,41.332897,86.751984,75.055893,173.064514,72.090363,31.958199,362.519989,135.980164,113.174721
2020-05-07,130.106308,41.010941,89.523712,74.001495,179.521561,72.317894,32.292645,366.779999,138.999649,111.726883
...,...,...,...,...,...,...,...,...,...,...
2023-04-24,105.059998,68.613686,110.400002,164.080002,277.250000,86.089996,80.800003,377.339996,215.470001,126.830002
2023-04-25,104.370003,68.265244,109.970001,164.899994,270.700012,86.739998,75.900002,369.589996,211.690002,123.779999
2023-04-26,102.919998,67.200005,108.750000,161.800003,271.209991,76.809998,77.029999,363.059998,210.830002,122.330002
2023-04-27,105.330002,68.559998,109.500000,148.869995,275.450012,77.610001,78.010002,371.420013,216.160004,124.900002


In [None]:
#Function to get historical price data from yahoo_financials
def get_historical_price_data(ticker, years):
    """
    get_historical_price_data function downloads the historical price data of the stock(s) from yahoo_financials for n years from today's date and returns a dataframe with the data.
    Parameters:
    ----------
    ticker : str
        Ticker of the stock(s) to be downloaded as a string or list of strings as ["ticker1", "ticker2", ...]
    years : int
        Number of years to download the data from today's date.
    """
    start = (datetime.datetime.now() - datetime.timedelta(days = 365 * years)).strftime("%Y-%m-%d") #3 years ago from today's date
    end = (datetime.datetime.now()).strftime("%Y-%m-%d") #Today

    try:
                data = pd.DataFrame(YahooFinancials(ticker).get_historical_price_data(start_date = start,
                                                                        end_date = end, time_interval="daily")[ticker]["prices"])
                data["formatted_date"] = pd.to_datetime(data["formatted_date"])
                data = data.set_index("formatted_date")
                data = data.drop(["date", "high", "low", "open", "close", "volume"], axis = 1)               
                data = data.rename(columns = {"adjclose" : ticker})

    except KeyError:
        pass
    except TypeError:
        pass
    except ValueError:
        pass

    return data

In [None]:
def get_historical_price_data(ticker, years):
    """
    Function to retrieve Adj. Closes data from OHLCV of ticker(s) from Yahoo_Financials for n years backwards from today's date.
    It returns a dataframe with the Adj. Close(s) of ticker(s) with datetime as index.
    Parameters:
    ----------
    ticker : str
        Ticker of the stock(s) to be downloaded as a string or str list, e.g: ["ticker_1", "ticker_2", ... , "ticker_n"]
    years : int
        Number of years for data download from today's date backwards.
    """
    start = (datetime.datetime.now() - datetime.timedelta(days = 365 * years)).strftime("%Y-%m-%d") 
    end = (datetime.datetime.now()).strftime("%Y-%m-%d") 
    
    try:
        data = pd.DataFrame(YahooFinancials(ticker).get_historical_price_data(start_date = start,
                                                                end_date = end, time_interval="daily")[ticker]["prices"])
        data["formatted_date"] = pd.to_datetime(data["formatted_date"])
        data = data.set_index("formatted_date")
        data = data.drop(["date", "high", "low", "open", "close", "volume"], axis = 1)               
        data = data.rename(columns = {"adjclose" : ticker})
        data = pd.concat(data, axis = 1) 

    except KeyError:
        pass
    except TypeError:
        pass
    except ValueError:
        pass

    return data