In [28]:
import pandas as pd
import numpy as np
import yfinance as yf

# Get the data
df_basic = pd.read_csv('./cleaned/basic.csv')
df_other = pd.read_csv('./cleaned/other.csv')
df_dividend = pd.read_csv('./cleaned/dividend.csv')

In [29]:
# Format date fields
df_basic['Date'] = pd.to_datetime(df_basic['Date'])
df_other['Date'] = pd.to_datetime(df_other['Date'])
df_dividend['Date'] = pd.to_datetime(df_dividend['Date'])

In [30]:
# Account for stock splits

tickers_all = df_basic['Name'].unique()
allSplits = []

for ticker in tickers_all:

    # Get the split and convert to a df
    splits = yf.Ticker(ticker).splits
    df_split = pd.DataFrame({'Date':splits.index, 'Split':splits.values})
    df_split["Name"] = ticker

    # If there was a split, append the df to allSplits. The ignoring of PFE is a quirk with a spin off they did with Viatris in 2020. This is a bad fix but it'll do for now
    if len(df_split.index) != 0 and ticker != "PFE":
        allSplits.append(df_split)

allSplits = pd.concat(allSplits)

# Iterate through splits and alter share counts as necessary
for index, row in allSplits.iterrows():
    df_basic.loc[(df_basic["Name"] == row["Name"]) & (df_basic["Date"] < row["Date"]), ["ShareCount"]] = (df_basic['ShareCount'] * row["Split"]).round(6)

In [32]:
# Get the shares held on a given day
def getShareCount(day = pd.to_datetime("today")):

    # Get the buys and sells for each day before the given date
    df_buysInRange = df_buys.loc[df_basic["Date"] < day, ["Date", "Name", "ShareCount"]]
    df_sellsInRange = df_sells.loc[df_basic["Date"] < day, ["Date", "Name", "ShareCount"]]

    # Any missing sells, i.e. any stock we've bought but not sold any of, need to still be in the resulting 'sell' df
    df_emptySells = df_buysInRange.copy()
    df_emptySells["ShareCount"] = 0
    df_sellsInRange = pd.concat([df_sellsInRange, df_emptySells])

    # Aggregate to get total shares in and out
    sharesIn = df_buysInRange.groupby('Name').agg({'ShareCount': 'sum'})['ShareCount']
    sharesOut = df_sellsInRange.groupby('Name').agg({'ShareCount': 'sum'})['ShareCount']

    # Calculate net shares
    netShares_all = (sharesIn - sharesOut).round(5).to_frame(name="ShareCount")
    netShares = netShares_all.loc[netShares_all["ShareCount"] != 0]

    return netShares

In [31]:
# Get the value of a list of shares on a given date (E.O.D). 
# Essentially just adds a 'value' column to the shares input df
def getValue(shares, date):
    print("Getting values of ")
    print(shares)
    print("On ")
    print(date)
    print("----------------") 

In [33]:
#Split buys and sells into separate dataframes
df_buys = df_basic.loc[df_basic["Type"] == "BUY"]
df_sells = df_basic.loc[df_basic["Type"] == "SELL"]

# A list of currently held tickers
tickers_current = getShareCount().index.values.tolist()

In [34]:
# Generate a daily summary table using the 'other' dataframe initially
df_dailyActivity = pd.pivot_table(
    df_other, 
    values="Value", 
    index="Date", 
    columns="Type", 
    aggfunc=np.sum).fillna(0)

# Convert buys and sells to same format as daily summary
df_basicPivot = pd.pivot_table(
    pd.concat([df_buys[["Date", "Type", "Value"]], df_sells[["Date", "Type", "Value"]]]), 
    values="Value", index="Date", 
    columns="Type", 
    aggfunc=np.sum
).fillna(0)

# pivot dividend table
df_diviPivot = pd.pivot_table(
    df_dividend, 
    values="Value", index="Date",  
    aggfunc=np.sum
).fillna(0)

# Concatenate daily summary with buys, sells, and dividends
df_dailyActivity = pd.concat([df_dailyActivity, df_basicPivot, df_diviPivot], axis=1).fillna(0)

# Format daily summary
df_dailyActivity = df_dailyActivity.rename(columns={"BUY": "Total Bought", "SELL": "Total Sold"})
df_dailyActivity['Net Income']= df_dailyActivity.sum(axis=1)
df_dailyActivity = df_dailyActivity.reset_index()

In [35]:
# Get daily portfolio value

# From first transaction date -> now, call getValue() inputting each date with getShaereCount() for that date

0.49000000000023647


In [36]:
df_dailyActivity.to_csv('./outputs/dailyActivity.csv', index=False)