import yfinance as yf # type: ignore
import pandas as pd

# Define the stock and market index
stock_tickers = ['HSBC.L', 'BP.L']
market_ticker = '^FTSE'  # FTSE 100

# Define the period
start_date = '2017-01-01'
end_date = '2020-01-01'

# Download data
stocks_data = {}
for ticker in stock_tickers:
    stocks_data[ticker] = yf.download(ticker, start=start_date, end=end_date)

market_data = yf.download(market_ticker, start=start_date, end=end_date)

# Display the first few rows
for ticker, data in stocks_data.items():
    print(f"{ticker} data:\n", data.head())
print(market_data.head())


In [11]:
# Calculate daily returns
for ticker in stock_tickers:
    stocks_data[ticker]['Return'] = stocks_data[ticker]['Adj Close'].pct_change()
market_data['Return'] = market_data['Adj Close'].pct_change()

# Merge the data on the date index
returns_data = market_data[['Return']].rename(columns={'Return': 'FTSE'}).copy()

for ticker in stock_tickers:
    returns_data[ticker] = stocks_data[ticker]['Return']

# Drop any rows with missing data
returns_data.dropna(inplace=True)

# Display the first few rows
print(returns_data.head())

Empty DataFrame
Columns: [FTSE, HSBC.L, BP.L]
Index: []


In [14]:
import yfinance as yf

# Download S&P 500 historical data
sp500 = yf.Ticker("^GSPC")
# Define the period
start_date = '2017-01-01'
end_date = '2020-01-01'
aapl_data = aapl.history(start=start_date, end= end_date)

# Save to csv
aapl_data.to_csv("aapl_Historical_Data.csv")


In [19]:
import yfinance as yf

# Download S&P 500 historical data
msft = yf.Ticker("MSFT")
# Define the period
start_date = '2017-01-01'
end_date = '2020-01-01'
msft_data = msft.history(start=start_date, end= end_date)

# Save to csv
msft_data.to_csv("MSFT_Historical_Data.csv")


In [20]:
import yfinance as yf

# Download S&P 500 historical data
aapl = yf.Ticker("AAPL")
# Define the period
start_date = '2017-01-01'
end_date = '2020-01-01'
aapl_data = aapl.history(start=start_date, end= end_date)

# Save to csv
aapl_data.to_csv("AAPL_Historical_Data.csv")


In [21]:
import yfinance as yf

# Download S&P 500 historical data
tsla = yf.Ticker("TSLA")
# Define the period
start_date = '2017-01-01'
end_date = '2020-01-01'
TSLA_data = tsla.history(start=start_date, end= end_date)

# Save to csv
tsla_data.to_csv("TSLA_Historical_Data.csv")

# Code above this line is for setting up the files

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy import stats
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
# Plotly.express module can plot interactive plots easily and effectively.

# it is important to note here that the Closing share prices are "Adjusted Close Prices"
AAPL = pd.read_csv("AAPL_Historical_Data.csv")
MSFT = pd.read_csv("MSFT_Historical_Data.csv")
TSLA = pd.read_csv("TSLA_Historical_Data.csv")
SP500 = pd.read_csv("SP500_Historical_Data.csv")

AAPL = AAPL[["Date", "Close"]]
MSFT = MSFT[["Date", "Close"]]
TSLA = TSLA[["Date", "Close"]]
SP500 = SP500[["Date", "Close"]]

AAPL.rename(columns={"Close": "AAPL"}, inplace = True)
MSFT.rename(columns={"Close": "MSFT"}, inplace = True)
TSLA.rename(columns={"Close": "TSLA"}, inplace = True)
SP500.rename(columns={"Close": "SP500"}, inplace = True)

In [4]:
# Convert the 'Date' column to datetime with timezone handling
TSLA['Date'] = pd.to_datetime(TSLA['Date'], utc=True)

# Convert to naive datetime (remove timezone)
TSLA['Date'] = TSLA['Date'].dt.tz_localize(None)

# Extract only the date part
TSLA['Date'] = TSLA['Date'].dt.date

# Change the date format to dd-mm-yyyy
TSLA['Date'] = TSLA['Date'].apply(lambda x: x.strftime('%d-%m-%Y'))

In [5]:
# Convert the 'Date' column to datetime with timezone handling
MSFT['Date'] = pd.to_datetime(MSFT['Date'], utc=True)

# Convert to naive datetime (remove timezone)
MSFT['Date'] = MSFT['Date'].dt.tz_localize(None)

# Extract only the date part
MSFT['Date'] = MSFT['Date'].dt.date

# Change the date format to dd-mm-yyyy
MSFT['Date'] = MSFT['Date'].apply(lambda x: x.strftime('%d-%m-%Y'))

In [6]:
# Convert the 'Date' column to datetime with timezone handling
AAPL['Date'] = pd.to_datetime(AAPL['Date'], utc=True)

# Convert to naive datetime (remove timezone)
AAPL['Date'] = AAPL['Date'].dt.tz_localize(None)

# Extract only the date part
AAPL['Date'] = AAPL['Date'].dt.date

# Change the date format to dd-mm-yyyy
AAPL['Date'] = AAPL['Date'].apply(lambda x: x.strftime('%d-%m-%Y'))

In [7]:
# Convert the 'Date' column to datetime with timezone handling
SP500['Date'] = pd.to_datetime(SP500['Date'], utc=True)

# Convert to naive datetime (remove timezone)
SP500['Date'] = SP500['Date'].dt.tz_localize(None)

# Extract only the date part
SP500['Date'] = SP500['Date'].dt.date

# Change the date format to dd-mm-yyyy
SP500['Date'] = SP500['Date'].apply(lambda x: x.strftime('%d-%m-%Y'))

In [8]:
TSLA['Date'] = pd.to_datetime(TSLA['Date'])
MSFT['Date'] = pd.to_datetime(MSFT['Date'])
AAPL['Date'] = pd.to_datetime(AAPL['Date'])
SP500['Date'] = pd.to_datetime(SP500['Date'])

  TSLA['Date'] = pd.to_datetime(TSLA['Date'])
  MSFT['Date'] = pd.to_datetime(MSFT['Date'])
  AAPL['Date'] = pd.to_datetime(AAPL['Date'])
  SP500['Date'] = pd.to_datetime(SP500['Date'])


In [9]:
TSLA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 754 entries, 0 to 753
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    754 non-null    datetime64[ns]
 1   TSLA    754 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 11.9 KB


In [10]:
TSLA.head()

Unnamed: 0,Date,TSLA
0,2017-03-01,14.466
1,2017-04-01,15.132667
2,2017-05-01,15.116667
3,2017-06-01,15.267333
4,2017-09-01,15.418667


In [11]:
# concatenation of all dataframes into one.
stocks_df = pd.concat([AAPL, MSFT.drop(columns=["Date"]), TSLA.drop(columns=["Date"]), SP500.drop(columns=["Date"])], axis = 1)

# sort by date
stocks_df = stocks_df.sort_values(by = ['Date'])
stocks_df.head(5)

Unnamed: 0,Date,AAPL,MSFT,TSLA,SP500
20,2017-01-02,29.876551,57.84029,16.615999,2279.550049
39,2017-01-03,32.579029,59.435688,16.667999,2395.959961
81,2017-01-05,34.161488,63.526787,21.521999,2388.330078
103,2017-01-06,35.847012,64.526077,22.691334,2430.060059
145,2017-01-08,35.114548,66.808884,21.304667,2476.350098


#### Below code is to change working directory

In [2]:

import os
import pandas as pd

# Specify the new directory path
new_directory = "/Users/yadav/ERP"  # Replace with your desired path

# Change the current working directory
os.chdir(new_directory)