In [6]:
# Import the required libraries to process data:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib as plt
# Import the required libraries to deal with operations on time series:
from datetime import datetime, timedelta, date

In [7]:
### Question 1:
### Choose three stocks to include in a portfolio (e.g., AAPL, TSLA, AMZN) and define their weights (e.g., 50%, 30%, 20%).
### Retrieve historical data for the stocks over the past 5 years.

def fetch_raw_data(tickers=["AAPL", "TSLA", "AMZN"]) -> pd.DataFrame:
    # 1) Define the time interval = {startDate, endDate}:
    endDate = date.today() # Ex: 2024-12-03
    startDate = (endDate - pd.DateOffset(years=5)).date() # Ex 2019-12-03

    # 2) Instantiate a list to dataframe of each stock:
    stock = []
    # 3) Iterate over the tickers list to download each stock present:
    for ticker in tickers:
        try: # 3.1) Attempt to download 5y worth of financial data for the current stock in a pd.DataFrame:
            df = yf.download(tickers=ticker, start=startDate, end=endDate, progress=False)
            # 3.2) remove the multi index:
            if isinstance(df.columns, pd.MultiIndex):
                # Only use the first level of the MultiIndex:
                df.columns = df.columns.get_level_values(0)

            df.reset_index(inplace=True)
            # 3.4) Remove unwanted features = {Volume}:
            df = df[["Date", "Open", "High", "Low", "Close", "Adj Close"]]
            # 3.5) Add the ticker as column:
            df["Ticker"] = ticker
            # 3.6) Append the current DataFrame to the stock list:
            stock.append(df)
        except Exception as e:
            print(f"Error couldn't download {ticker}: {str(e)}")
            continue 
            
    # 4) Return a concatenated DataFrame:
    df = pd.concat(stock, ignore_index=True)
    return df

raw_df = fetch_raw_data()
print(raw_df)


Price       Date        Open        High         Low       Close   Adj Close  \
0     2019-12-04   65.267502   65.827499   65.169998   65.434998   63.438080   
1     2019-12-05   65.947502   66.472504   65.682503   66.394997   64.368790   
2     2019-12-06   66.870003   67.750000   66.824997   67.677498   65.612167   
3     2019-12-09   67.500000   67.699997   66.227501   66.730003   64.693558   
4     2019-12-10   67.150002   67.517502   66.464996   67.120003   65.071671   
...          ...         ...         ...         ...         ...         ...   
3769  2024-11-26  201.899994  208.000000  201.789993  207.860001  207.860001   
3770  2024-11-27  206.979996  207.639999  205.050003  205.740005  205.740005   
3771  2024-11-29  205.830002  208.199997  204.589996  207.889999  207.889999   
3772  2024-12-02  209.960007  212.990005  209.509995  210.710007  210.710007   
3773  2024-12-03  210.309998  214.020004  209.649994  213.440002  213.440002   

Price Ticker  
0       AAPL  
1       A

In [8]:
### Question 2:
### Calculate Daily Portfolio Value:
def compute_daily_portfolio_value(df: pd.DataFrame, weights: list) -> pd.DataFrame:
    # 1) Pivot the DataFrame to get Adjusted Close prices in a single matrix:
    # Divides each price by the first price in the dataset (row 0). This normalizes all prices to start at 1 on the first date.
    prices = df.pivot(index="Date", columns="Ticker", values="Adj Close")
    # 2) Normalize and calculate weighted portfolio values in one step:
    normalized_prices = prices / prices.iloc[0]
    # Multiplies each normalized price by its corresponding weight in the weights list
    # For each row (representing a day), it computes the dot product of the row's values (normalized prices) and the weights.
    # Ex Day 1: (1.0000 * 0.5) + (1.0000 * 0.3) + (1.0000 * 0.2) = 1.0000
    portfolio_value = (normalized_prices * weights).sum(axis=1)  
    # 3) Convert the resulting Series into a DataFrame with the desired format:
    result = portfolio_value.reset_index()
    result = result.rename(columns={0: "Portfolio Value"})
    return result

weights = [0.5, 0.3, 0.2]
portfolio_value = compute_daily_portfolio_value(raw_df, weights)
print(portfolio_value)

           Date  Portfolio Value
0    2019-12-04         1.000000
1    2019-12-05         1.002295
2    2019-12-06         1.017304
3    2019-12-09         1.011894
4    2019-12-10         1.018710
...         ...              ...
1253 2024-11-26         5.607851
1254 2024-11-27         5.551499
1255 2024-11-29         5.688272
1256 2024-12-02         5.823162
1257 2024-12-03         5.805507

[1258 rows x 2 columns]


In [14]:
### Question 3: Compute cumulative value of the portfolio:
def compute_cumulative_value(df: pd.DataFrame) -> pd.DataFrame:
    # 1) Compute the portfolio daily return:
    portfolio_daily_returns = df["Portfolio Value"].pct_change()
    # 2) Compute the portfolio cumulative return:
    portfolio_cumulative_returns = (1 + portfolio_daily_returns).cumprod() - 1
    # 3) Combine with the Date column for a complete DataFrame:
    result = pd.DataFrame({"Date": df["Date"],"Cumulative Return": portfolio_cumulative_returns}).dropna()
    # 5) Return the DataFrame
    return result

portfolio_cumulative_returns = compute_cumulative_value(portfolio_value)
print(portfolio_cumulative_returns)

           Date  Cumulative Return
1    2019-12-05           0.002295
2    2019-12-06           0.017304
3    2019-12-09           0.011894
4    2019-12-10           0.018710
5    2019-12-11           0.027023
...         ...                ...
1253 2024-11-26           4.607851
1254 2024-11-27           4.551499
1255 2024-11-29           4.688272
1256 2024-12-02           4.823162
1257 2024-12-03           4.805507

[1257 rows x 2 columns]


In [19]:
### Question 4: Retrieve and compute the index cumulative returns:
### GSPC often refers to the S&P 500 Index
### used as a benchmark index for comparison in portfolio performance analysis.
### Purpose: Determine if portfolio is outperforming the market:
def comparison_with_gspc(portfolio_df: pd.DataFrame) -> pd.DataFrame:
    # 1) Extract the first and last date from the portfolio DF:
    firstDate = portfolio_df["Date"].min()
    lastDate = portfolio_df["Date"].max()
    # 2) Fetch the GSPC financial data:
    gspc_df = yf.download("^GSPC", start=firstDate, end=lastDate, progress=False)
    # 3) return gspc:
    return gspc_df


test = comparison_with_gspc(portfolio_cumulative_returns)
print(test)
    



1 Failed download:
['^GSPC']: SSLError(MaxRetryError("HTTPSConnectionPool(host='www.yahoo.com', port=443): Max retries exceeded with url: /?guccounter=1 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1020)')))"))


Empty DataFrame
Columns: [(Adj Close, ^GSPC), (Close, ^GSPC), (High, ^GSPC), (Low, ^GSPC), (Open, ^GSPC), (Volume, ^GSPC)]
Index: []
