In [1]:
# import libraries 
import pandas as pd 
import numpy as np
# Perform Independent-Samples T-Test 
from scipy.stats import ttest_ind
# Perform Paired-Samples T-Test 
from scipy.stats import ttest_rel

In [2]:
# read csv file into data frame
df = pd.read_csv("all_stocks.csv")

In [3]:
# convert Date string to datetime object
df['Date'] = pd.to_datetime(df['Date'], format = '%m/%d/%Y')

In [4]:
# sort df by Date
df.sort_values(by='Date', inplace = True)

In [5]:
# reset index after sorting the df
df.reset_index(drop=True, inplace = True)

In [6]:
# add ibm_return, wmt_return, msft_return, and amzn_return columns
df["ibm_return"] = (df["ibm"] / df["ibm"].shift(1)) - 1
df["wmt_return"] = (df["wmt"] / df["wmt"].shift(1)) - 1
df["msft_return"] = (df["msft"] / df["msft"].shift(1)) - 1
df["amzn_return"] = (df["amzn"] / df["amzn"].shift(1)) - 1

In [7]:
# replace the first entry of ibm, wmt, msft, and amzn returns to 0
df["ibm_return"][0] = 0
df["wmt_return"][0] = 0
df["msft_return"][0] = 0
df["amzn_return"][0] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ibm_return"][0] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["wmt_return"][0] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["msft_return"][0] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["amzn_return"][0] = 0


In [8]:

# create 2-dimension Numpy arrays to store the two portfolios (ibm & wmt, msft & amzn)
portfolio1 = np.array([df["ibm_return"], df["wmt_return"]]) 
portfolio2 = np.array([df["msft_return"], df["amzn_return"]])

In [9]:
# 1)Are there any differences in the rate of return of IBM (ibm) vs. Walmart (wmt)
# based on the daily rate of return of these two stocks from 1/1/2019 to 4/18/2022?

# Determine Equal Variance by testing if (the Larger Stand Deviation / the smaller Standard Deviation) > 2
# Assume equal variance unless (the Larger Stand Deviation / the smaller Standard Deviation) > 2

EqualVar = True
if portfolio1[0].std() > portfolio1[1].std():
    if (portfolio1[0].std() / portfolio1[1].std()) > 2:
        EqualVar = False
else:
    if (portfolio1[1].std() / portfolio1[0].std()) > 2:
        EqualVar = False

In [10]:
# Obtain T-Stat and Pvalue for portfolio1 
ttest_p1 = ttest_ind(portfolio1[0], portfolio1[1], equal_var=EqualVar)   
# store the p_value from your t-test analysis in the variable named iw_pvalue                       
iw_pvalue = ttest_p1.pvalue
# store either "YES" or "NO" in the variable named iw_diff
if iw_pvalue < 0.05:
    iw_diff = "YES"
else:
    iw_diff = "NO"

In [11]:
# 2) Assume IBM is 40 percent, and Walmart is 60 % of the portfolio. 
# What is the average rate of return for the portfolio containing
# IBM and Walmart from 1/1/2019 to 4/18/2022? 

# calculate the average rate of return for the portfolio 1
# store the average rate of return in the variable named iwp_rr

iwp_rr =np.average(portfolio1[0])*.4 +np.average(portfolio1[1])*.6

In [12]:
# 3) Assume Microsoft is 40 percent, and Amazon is 60 % of the portfolio.
# What is the average rate of return for the portfolio containing Microsoft (msft)
# and Amazon (amzn) from 1/1/2019 to 4/18/2022?

# store the average rate of return in the variable named map_rr

map_rr =np.average(portfolio2[0])*.4 +np.average(portfolio2[1])*.6

In [13]:
# 4) Which portfolio has a higher rate of return?
# Assume IBM and Walmart are Portfolio 1, and Microsoft and Amazon are Portfolio 2.

# store the integer number 1 or 2 
# (whichever has the higher rate of return) in the variable named best_portfolio

if iwp_rr >  map_rr :
    best_portfolio = "1"
else:
    best_portfolio = "2"

In [14]:
# 5) Did Walmart (wmt) stock perform better pre-Covid (1/1/2019 to 3/14/2020)
# than during the pandemic (3/15/2020 to 5/25/2021) based on the daily rate of return
# of the Walmart stock from 1/1/2019 to 4/18/2022?

# create pre-covid dataframe based on date range to store the selected daily rate of return of wmt

pre_df = df[(df['Date'] >= '1/1/2019') & (df['Date'] <= ' 3/14/2020')]["wmt_return"]

# create pandemic period dataframe based on date range to store the selected daily rate of return of wmt
pan = df[(df['Date'] >= '3/15/2020') & (df['Date'] <= ' 5/25/2021')]["wmt_return"]

In [15]:
# reset the pan_df index 

pan.reset_index(drop=True, inplace = True)

In [16]:
# change the pandemic daily rate of return for the first entry to zero (3-15-2020)
pan[0] = 0
pan_df = pan

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pan[0] = 0


In [24]:
# create 2-dimension Numpy arrays to store the pre-Covid and 
# pandemic period wmt daily ROR

pre = np.array(pre_df)
post = np.array(pan_df)

# this step is optional....you can just use pre_df and pan_df with the ttest_rel()
ttest_resualt =  ttest_rel(pre, post, alternative='greater')   

In [23]:
# store the p_value from your t-test analysis in the variable named better_w_pvalue  
better_w_pvalue  = ttest_resualt.pvalue

# store either "YES" or "NO" in the variable named better_w_covid

if better_w_pvalue < 0.05:
    better_w_covid = "YES"
else:
    better_w_covid = "NO"