#### Markowitz Modern Portfolio Theory is a practical method that allows us to build a portfolio that maximizes returns while maintaining a reasonable level of risk. <br>

#### The key component of this theory is diversification. It explains that by investing in stocks that are uncorrelated, we can lower the overall risk of a portfolio while maximizing returns for that level of risk. <br>

#### in MPT, the risk and return of an investment is not viewed individually, but by how it affects the risk and return of the entire portfolio.


#### This project uses Markowitz's Theory to create an efficient portfolio using only technology stocks from the S&P 500 index.

#### Libraries used in project



In [None]:
#import necessary libraries
import os
import numpy as np
import pandas as pd
import datetime as dt
import seaborn as sns
import yfinance as yfin
import matplotlib.pyplot as plt

### Functions

Creating all the functions that will be used over the course of this project

In [None]:
def get_dataframe_from_webpage(url, index):
    """
    desc: parses a webpage table into a dataframe
    params: webpage url, table index
    returns: dataframe of downloaded table
    """
    data = pd.read_html(url)[index]

    return data


In [None]:
def dataframe_by_column_category(df, column_name, column_category):
    """
    desc: gets a slice of a dataframe by specifying a column category
    params: dataframe to be sliced, column name, column category
    returns: sliced dataframe
    """
    sliced_df = df.loc[df[ column_name ] == column_category]
    return sliced_df

In [None]:
def get_items_from_dataframe_column(df, column_name):
    """
    desc: gets the row items of a dataframe column and saves them in a list
    params: dataframe, column name
    returns: row items of a dataframe column
    rtype: list
    """
    return df[column_name].to_list()

In [None]:
def csv_from_yahoo_data(file_path, ticker, s_year, s_month, s_day, e_year, e_month, e_day):
    """
    desc: downloads company stock data for a specific period from yahoo finance and saves it as csv
    params: folder to dave data, company ticker, period start date, period end date
    returns: saved data as csv file
    rtype: .csv
    """
    start_date = dt.datetime(s_year, s_month, s_day)
    end_date = dt.datetime(e_year, s_month, s_day)

    try:
        print(f"downloading data for company:{ticker}")
        stock_data = yfin.download(ticker, start_date, end_date)

        stock_data.to_csv(file_path + ticker + ".csv")

    except Exception as e:
        print(f"Failed to download stock data for {ticker}")

    else:
        return stock_data

In [None]:
def multiple_csv_from_yahoo_data(file_path, s_year, s_month, s_day, e_year, e_month, e_day, tickers):
    """
    desc: download and save multiple stock data from yahoo finance to csv
    params: list of tickers, period start date, period end date
    """
    for ticker in tickers:
        csv_from_yahoo_data(file_path, ticker, s_year, s_month, s_day, e_year, e_month, e_day)

In [None]:
def count_csv_in_folder(file_path):
    """
    desc: returns the number of csv files present in a path
    params: folder path
    returns: count of csv files
    rtype: int
    """
    files = [file for file in os.listdir(file_path) if file.endswith(".csv")]
    return len(files)

In [None]:
def df_from_csv(ticker):
    """
    desc:  creates dataframe from a csv file
    params: csv file path
    returns: pandas dataframe
    """
    df = pd.read_csv(file_path + ticker + ".csv", index_col='Date', parse_dates=True)
    return df


In [None]:
def merge_df_by_column(column_name, tickers):
    """
    desc: merges multiple dataframes by column
    params: column to merge by, tickers for each df
    returns: merged dataframe
    rtype: pandas dataframe
    """
    merged_df = pd.DataFrame()

    for ticker in tickers:
        df = df_from_csv(ticker)
        merged_df[ticker] = df[column_name]

    return merged_df

In [None]:
def get_returns_from_df(df):
    """
    desc: calculates returns (relative change in price) for all columns in a dataframe
    params: dataframe
    returns returns dataframe
    rtype: pandas dataframe
    """
    returns = df.pct_change()
    return returns

In [None]:
def get_roi(df):
    """
    desc: get return on investment (roi) for dataframe
    params: dataframe
    returns: roi value
    rtype: float
    """
    start = df.loc[df.index[0]]['Adj Close']
    end = df.loc[df.index[-1]]['Adj Close']

    roi = (end - start) / start

    return roi


In [None]:
def get_roi_for_multiple_stocks(tickers):
    """
    desc: create dataframe of rois for all stocks by their tickers
    params: tickers list
    returns: dataframe of rois
    rtype: pandas dataframe
    """
    tickers_list = []
    rois = []

    for ticker in tickers:
        df = df_from_csv(ticker)
        roi = get_roi(df)
        rois.append(roi)
    return pd.DataFrame({'Ticker':tickers, 'ROI':rois})