# Imports


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import mplfinance as mpf
import matplotlib.dates as mdates
import datetime
import pandas_datareader as datareader
import datetime
import yfinance as yf
import tqdm

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots

pio.renderers.default = "notebook"
pio.templates.default = "plotly_dark"
import gc
import warnings

warnings.filterwarnings("ignore")
plt.rcParams["figure.figsize"] = [12, 8]

In [80]:
utc = datetime.timezone.utc

# Some Functions and DataFrames


In [None]:
def get_beta(df, freq="Y", names=["df1", "df2"]):
    """
    Get beta of two stocks

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe of two stocks with date as index
    freq : str, optional
        Frequency of the data, by default "Y"

    Returns
    -------
    float
        Beta of the two stocks
    """
    df = df.copy()
    df = df.asfreq(freq).dropna()
    df["Returns_" + names[0]] = df[names[0]].pct_change()
    df["Returns_" + names[1]] = df[names[1]].pct_change()
    df = df.dropna()
    covariance = df["Returns_" + names[0]].cov(df["Returns_" + names[1]])
    variance = df["Returns_" + names[1]].var()
    beta = covariance / variance
    return beta

In [None]:
def expected_return(rf, beta, Erm):
    """
    Get expected return of a stock

    Parameters
    ----------
    rf : float
        Risk free rate
    beta : float
        Beta of the stock
    Erm : float
        Expected return of the market

    Returns
    -------
    float
        Expected return of the stock
    """
    return rf + beta * (Erm - rf)

In [None]:
apple = pd.read_csv("Data/AAPL.csv", parse_dates=["Date"], index_col="Date")
google = pd.read_csv("Data/GOOG.csv", parse_dates=["Date"], index_col="Date")
snp = pd.read_csv("Data/GSPC.csv", parse_dates=["Date"], index_col="Date")
gold = pd.read_csv("Data/gold.csv", parse_dates=["Date"], index_col="Date")
oil = pd.read_csv("Data/oil.csv", parse_dates=["Date"], index_col="Date")
treasury = pd.read_csv("Data/treasury.csv", parse_dates=["Date"], index_col="Date")

In [None]:
apple = apple[apple.index >= pd.to_datetime("2012-01-01").tz_localize(utc)]
google = google[google.index >= "2012-01-01"]
snp = snp[snp.index >= "2012-01-01"]
gold = gold[gold.index >= "2012-01-01"]
oil = oil[oil.index >= "2012-01-01"]
treasury = treasury[treasury.index >= "2012-01-01"]

NameError: name 'utc' is not defined

# Start

In [68]:
def convert_str_to_date(string, format="%Y-%m-%d"):
    """
    Convert string to date

    Parameters
    ----------
    string : str
        String to convert
    format : str, optional
        Format of the string, by default "%Y-%m-%d"

    Returns
    -------
    datetime.date
        Date
    """
    if isinstance(string, datetime.date):
        return string
    if not string:
        return None
    return datetime.datetime.strptime(string, format)

In [59]:
convert_str_to_date("2012-01-01")>convert_str_to_date("2012-01-02")

False

In [63]:
def load_data(file_dir, start_date=None, end_date=None, columns=None, freq="D", rename_cols=None):
    """
    Read data from a csv file and return a dataframe.
    Assumes that the dataframe has column `Date` containing dates.

     Parameters
     ----------
     file_dir : str
         Directory of the file
     range : tuple, optional
         Range of the data, by default ("2000-01-01", None)
    """
    df = pd.read_csv(file_dir, parse_dates=["Date"], index_col="Date")

    df.sort_index(inplace=True)
    smallest_date = df.index[0]
    largest_date = df.index[-1]
    start_date = convert_str_to_date(start_date)
    end_date = convert_str_to_date(end_date)

    if start_date and start_date < smallest_date:
        print("Start date is before the data starts. If you don't want to specify a start date, set it to None.")
        return
    if end_date and end_date > largest_date:
        print("End date is after the data ends. If you don't want to specify an end date, set it to None.")
        return
    
    if not start_date:
        start_date = df.index[0]
    if not end_date:
        end_date = df.index[-1]
    date_range = (start_date, end_date)
    df = df[(df.index >= date_range[0]) & (df.index <= date_range[1])]
    if columns:
        df = df[columns]
    if rename_cols:
        df.columns = rename_cols
    df = df.asfreq(freq, "ffill").dropna()
    return df


In [64]:
def merge_dfs(dfs, df_names = None, join="inner"):
    """
    Merges a list of dataframes into one. Uses the index as the key and `pd.concat` to merge the dataframes

    Parameters
    ----------
    dfs : list
        List of dataframes to merge
    df_names : list, optional
        Names of the dataframes, by default None Uses this to rename the columns of the merged dataframe
    join : str, optional
        How to join the dataframes, by default "inner"

    Returns
    -------
    pd.DataFrame
        Merged dataframe
    """
    df = pd.concat(dfs, axis=1, join=join)
    if df_names:
        cols = []
        for stock, name in zip(dfs, df_names):
            stock_cols = [f"{name}_{col}" for col in stock.columns]
            cols.extend(stock_cols)
        df.columns = cols
    return df

In [None]:
def calculate_stock_params(df, freq="Y", names=["df1", "df2"]):
    """
    Calculate beta and expected return of two stocks

    Parameters
    ----------
    df : pd.DataFrame
        Dataframe of two stocks with date as index
    freq : str, optional
        Frequency of the data, by default "Y"
    names : list, optional
        Names of the stocks, by default ["df1", "df2"]

    Returns
    -------
    dict
        Dictionary of beta and expected return
    """
    beta = get_beta(df, freq=freq, names=names)
    Erm = df[names[1]].mean()
    rf = treasury["1 Mo"].mean()
    E = expected_return(rf, beta, Erm)
    return {"beta": beta, "expected_return": E}

In [39]:
apple = load_data("Data/AAPL.csv", freq="D", columns=["Adj Close"], rename_cols=["Apple"])
google = load_data("Data/GOOG.csv", freq="D", columns=["Adj Close"], rename_cols=["Google"])
snp = load_data("Data/GSPC.csv", freq="D", columns=["Adj Close"], rename_cols=["S&P"])

In [67]:
load_data("Data/AAPL.csv", start_date="2010-01-01", end_date=None, freq="W")

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-10,7.510714,7.571429,7.466429,7.570714,6.453411,447610800
2010-01-17,7.533214,7.557143,7.352500,7.354643,6.269230,594067600
2010-01-24,7.385000,7.410714,7.041429,7.062500,6.020202,881767600
2010-01-31,7.181429,7.221429,6.794643,6.859286,5.846978,1245952400
2010-02-07,6.879643,7.000000,6.816071,6.980714,5.950484,850306800
...,...,...,...,...,...,...
2022-11-27,148.309998,148.880005,147.119995,148.110001,148.110001,35195900
2022-12-04,145.960007,148.000000,145.649994,147.809998,147.809998,65421400
2022-12-11,142.339996,145.570007,140.899994,142.160004,142.160004,76069500
2022-12-18,136.690002,137.649994,133.729996,134.509995,134.509995,160080100


In [30]:
display(apple.head())
display(google.head())
display(snp.head())

Unnamed: 0_level_0,Apple
Date,Unnamed: 1_level_1
1980-12-12,0.099874
1980-12-13,0.099874
1980-12-14,0.099874
1980-12-15,0.094663
1980-12-16,0.087715


Unnamed: 0_level_0,Google
Date,Unnamed: 1_level_1
2004-08-19,2.499133
2004-08-20,2.697639
2004-08-21,2.697639
2004-08-22,2.697639
2004-08-23,2.724787


Unnamed: 0_level_0,S&P
Date,Unnamed: 1_level_1
1927-12-30,17.66
1927-12-31,17.66
1928-01-01,17.66
1928-01-02,17.66
1928-01-03,17.76


In [31]:
print(len(apple), len(google), len(snp))

15358 6707 34699


In [33]:
stocks = [apple, google, snp]
df = merge_dfs(stocks)
df.head()

Unnamed: 0_level_0,Apple,Google,S&P
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-08-19,0.46746,2.499133,1091.22998
2004-08-20,0.46883,2.697639,1098.349976
2004-08-21,0.46883,2.697639,1098.349976
2004-08-22,0.46883,2.697639,1098.349976
2004-08-23,0.473092,2.724787,1095.680054


In [22]:
cols = []
stocks = [apple, google, snp]
stock_names = ["apple", "google", "snp"]
for stock, name in zip(stocks, stock_names):
    stock_cols = [f"{name}_{col}" for col in stock.columns]
    cols.extend(stock_cols)
df.columns = cols
df.head()

Unnamed: 0_level_0,apple_Open,apple_High,apple_Low,apple_Close,apple_Adj Close,apple_Volume,google_Open,google_High,google_Low,google_Close,google_Adj Close,google_Volume,snp_Open,snp_High,snp_Low,snp_Close,snp_Adj Close,snp_Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2004-08-19,0.562679,0.568929,0.542143,0.548393,0.46746,388920000,2.490664,2.591785,2.390042,2.499133,2.499133,897427216,1095.170044,1095.170044,1086.280029,1091.22998,1091.22998,1249400000
2004-08-20,0.548393,0.553393,0.544464,0.55,0.46883,316780800,2.51582,2.716817,2.503118,2.697639,2.697639,458857488,1091.22998,1100.26001,1089.569946,1098.349976,1098.349976,1199900000
2004-08-21,0.548393,0.553393,0.544464,0.55,0.46883,316780800,2.51582,2.716817,2.503118,2.697639,2.697639,458857488,1091.22998,1100.26001,1089.569946,1098.349976,1098.349976,1199900000
2004-08-22,0.548393,0.553393,0.544464,0.55,0.46883,316780800,2.51582,2.716817,2.503118,2.697639,2.697639,458857488,1091.22998,1100.26001,1089.569946,1098.349976,1098.349976,1199900000
2004-08-23,0.551071,0.558393,0.546429,0.555,0.473092,254660000,2.758411,2.826406,2.71607,2.724787,2.724787,366857939,1098.349976,1101.400024,1094.72998,1095.680054,1095.680054,1021900000


In [102]:
df = snp.merge(apple, how="inner", left_index=True, right_index=True).merge(google, how="inner", left_index=True, right_index=True)
df.head()

Unnamed: 0_level_0,S&P,Apple,Google
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-08-19,1091.22998,0.46746,2.499133
2004-08-20,1098.349976,0.46883,2.697639
2004-08-21,1098.349976,0.46883,2.697639
2004-08-22,1098.349976,0.46883,2.697639
2004-08-23,1095.680054,0.473092,2.724787


In [103]:
len(df)

6707

In [2]:
tcs = yf.Ticker("TCS.NS")
tcs

yfinance.Ticker object <TCS.NS>

In [3]:
tcs.balancesheet

Unnamed: 0,2022-03-31,2021-03-31,2020-03-31,2019-03-31
Total Assets,1415140000000.0,1307590000000.0,1208990000000.0,1149430000000.0
Current Assets,1083100000000.0,992800000000.0,902370000000.0,921310000000.0
Cash Cash Equivalents And Short Term Investments,484330000000.0,383830000000.0,357250000000.0,419390000000.0
Cash And Cash Equivalents,124880000000.0,68450000000.0,86460000000.0,72240000000.0
Cash Financial,124880000000.0,68450000000.0,86460000000.0,72240000000.0
...,...,...,...,...
Tangible Book Value,861180000000.0,841300000000.0,821330000000.0,875670000000.0
Total Debt,78180000000.0,77950000000.0,81760000000.0,5350000000.0
Share Issued,3659051373.0,3699051373.0,3752384706.0,3752384706.0
Ordinary Shares Number,3659051373.0,3699051373.0,3752384706.0,3752384706.0


In [6]:
tcs.get_balance_sheet(legacy=True)

Unnamed: 0,2022-03-31,2021-03-31,2020-03-31,2019-03-31
IntangibleAssets,11010000000,4800000000,2830000000,1790000000
TotalLiab,516680000000,436510000000,361500000000,250440000000
TotalStockholderEquity,891390000000,864330000000,841260000000,894460000000
MinorityInterest,7070000000,6750000000,6230000000,4530000000
DeferredLongTermLiab,1500000000,2280000000,2860000000,1740000000
OtherCurrentLiab,290460000000,132970000000,95200000000,76290000000
TotalAssets,1415140000000,1307590000000,1208990000000,1149430000000
CommonStock,3660000000,3700000000,3750000000,3750000000
OtherCurrentAssets,62090000000,40400000000,20660000000,22080000000
RetainedEarnings,856070000000,825580000000,808060000000,868890000000


In [75]:
from pystock.utils import Stock

In [76]:
ticker = "APPL"
directory = "Data/AAPL.csv"

apple = Stock(ticker, directory)

In [77]:
data = apple.load_data()

AttributeError: 'Stock' object has no attribute 'convert_str_to_date'