In [15]:
import pandas as pd
import numpy as np
import random
import math
import yfinance as yf
pd.options.mode.chained_assignment = None  # default='warn'

from numpy.random import choice
from pandas_datareader import data as web # Reads stock data 
from datetime import timedelta

In [16]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [17]:
# Default Variables
startDate = getDateFormat(1, 1, 2010)
endDate = pd.to_datetime("today")
endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)

depositAmount = 50000
depositNo = 20

fxRate = 1.35

buyNo = 400

# Range between 0-1 (min spend MUST be lower than maxSpend)
# Higher = spend more per buy = more sells
# Larger range = more noise
minSpend = 0.1
maxSpend = 0.3

# Range between 0-1
# The max % of the current portfolio value to sell
maxSell = 0.3

# 2 of tech, food, energy, etfs, finance, health
tickers = ["AAPL", "GOOG", "MCD", "K", "XOM", "PLUG", "SPY", "HUKX.L", "JPM", "BARC.L", "PFE", "JNJ"]

In [18]:
# Get user input

In [19]:
# Print paramaters
startDate_format = startDate.strftime('%d/%m/%Y')
endDate_format = endDate.strftime('%d/%m/%Y')
months = ((endDate - startDate)/np.timedelta64(1, 'M'))

length_years = int(months/12)
length_months = int(months%12)
print("Simulating investments from {0} to {1} ({2} year(s) and {3} month(s))".format(startDate_format, endDate_format, length_years, length_months))

meanDeposit = round(depositAmount/depositNo, 2)
print("Simulating {0} deposits totalling £{1} for a mean deposit of £{2}".format(depositNo, depositAmount, meanDeposit))

print("Simulating {0} buys spread across the following {1} companies:".format(buyNo, len(tickers)))
print(tickers)

print("Assuming FX rate of £1 = ${0}".format(fxRate))

Simulating investments from 01/01/2010 to 15/11/2021 (11 year(s) and 10 month(s))
Simulating 20 deposits totalling £50000 for a mean deposit of £2500.0
Simulating 400 buys spread across the following 12 companies:
['AAPL', 'GOOG', 'MCD', 'K', 'XOM', 'PLUG', 'SPY', 'HUKX.L', 'JPM', 'BARC.L', 'PFE', 'JNJ']
Assuming FX rate of £1 = $1.35


In [20]:
# Import all closing prices for each ticker for the required date ranges
df_closing_all = []

for ticker in tickers:
    df_closing_ticker = []
    # Get the close prices for each day
    df_closing_ticker = web.DataReader(ticker, 'yahoo', startDate, endDate)    

    # Format the df
    df_closing_ticker.drop(columns=["High", "Low", "Open", "Volume", "Adj Close"], errors="ignore", inplace=True)
    df_closing_ticker["Name"] = ticker

    tickerCurrency = "GBP" if ticker[len(ticker) - 2] == "." else "USD"

    # If it's USD, divide by the exchange rate for the relevant day
    if tickerCurrency == "USD":
        df_closing_ticker["Close"] = df_closing_ticker["Close"]/fxRate

    # If it's GBp, divide by 100 to convert to £
    elif tickerCurrency == "GBP":
        df_closing_ticker["Close"] = df_closing_ticker["Close"]/100
    else:
        print("ERROR: Unidentified Currency - ", ticker)
    
    df_closing_all.append(df_closing_ticker.reset_index())

# Concatenate all data
df_closing_all = pd.concat(df_closing_all)

In [21]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [22]:
# Simulate deposits

# Generate 'depositNo' random numbers (between 0 and 1) to represent each deposit
randValues = [ random.random() for i in range(depositNo) ]

# Convert these random floats to deposit values
# sum(rand) will be ~ 1/2 of depositNo, and i will be ~0.5 on avg, leading to the sum of all values being close to depositAmount
# But the use of floor() means the sum will be a little lower than depositAmount
depositValues = [ math.floor(i * depositAmount / sum(randValues)) for i in randValues ]

# Randomly add 1 to values until we reach depositAmount
deficit = depositAmount - sum(depositValues)
for i in range(deficit): 
    depositValues[random.randint(0, depositNo - 1)] += 1

# We don't want any deposits closer to today than 30 days, but this won't work if the total number of days is too small, so take the minimum between 30 and 1/4 of the total days\n",
totalDays = ((endDate - startDate)/np.timedelta64(1, 'D'))
days_limit = totalDays - min(int(totalDays/4), 30)

# Generate list of random numbers (acting as days to add to the start date), then add them to generate the list of random dates
randDays = [ random.randint(0, days_limit) for i in range(depositNo - 1) ]
depositDates = [ startDate + timedelta(days=r) for r in sorted(randDays) ]
depositDates.append(startDate)
depositTypes = ["Deposit"] * depositNo

df_other = pd.DataFrame({'Date': depositDates, 'Value': depositValues, 'Type': depositTypes})

In [23]:
def getValue(date):
    df_depositsInRange = df_other.loc[df_other["Type"] == "Deposit", ["Date", "Value"]]
    df_depositsInRange = df_depositsInRange.loc[df_depositsInRange["Date"] <= date]
    return df_depositsInRange["Value"].sum()

In [24]:
def getPrice(ticker, date):
    price = df_closing_all.loc[(df_closing_all["Name"] == ticker) & (df_closing_all["Date"] == date), "Close"]

    dayCounter = 1
    while len(price) == 0:
        prevDay = date - timedelta(days=dayCounter)
        price = df_closing_all.loc[(df_closing_all["Name"] == ticker) & (df_closing_all["Date"] == prevDay), "Close"]
        dayCounter += 1

    return price.values[0]

In [25]:
def generateBuy(date, value):
    buyTicker = random.choice(tickers)
    
    buyPrice = getPrice(buyTicker, date)
    buyShares = value/buyPrice

    buyFX = fxRate
    buyCurrency = "USD"

    if buyTicker[len(buyTicker) - 2] == ".":
        buyFX = 1
        buyCurrency = "GBP"

    return [buyTicker, buyShares, buyFX, buyCurrency]

In [26]:
def generateSell(portfolio, date):

    totalValue = portfolio["Value"].sum()
    sellValue = max(10, random.randint(0, math.floor(totalValue*maxSell)))

    eligible = portfolio.loc[portfolio["Value"] >= sellValue]
    sellTicker = eligible.sample(n=1)["Ticker"].values[0]

    sellShareCount = sellValue/getPrice(sellTicker, date)

    sellFX = fxRate
    sellCurrency = "USD"

    if sellTicker[len(sellTicker) - 2] == ".":
        sellFX = 1
        sellCurrency = "GBP"
        
    return [sellTicker, sellShareCount, sellValue, sellFX, sellCurrency]

In [27]:
# Simulate buys and sells

firstDeposit = df_other["Date"].min()
totalDays = ((endDate - firstDeposit)/np.timedelta64(1, 'D'))
randDays = [ random.randint(0, totalDays) for i in range(buyNo) ]
buyDates = [ firstDeposit + timedelta(days=r) for r in sorted(randDays) ]

basic_values, basic_types, basic_tickers, basic_dates, basic_shareCount, basic_fx, basic_currency= [], [], [], [], [], [], []

for i in range(buyNo - 1):
    buyDate = buyDates[i]
    # The amount of money in the bank
    currentBalance = getValue(buyDate) + sum(basic_values)

    # If we have less than £10 in the bank, sell something
    if currentBalance < 10:

        # Add the sell to the day before, so we have the money to buy today
        sellDate = buyDate - timedelta(days=1)

        # Get current holdings
        currentTickers = []
        currentSharecount = []
        currentValues = []

        uniqueTickers = set(basic_tickers)

        # For all tickers that we hold, sum the number of shares 
        for t in uniqueTickers:
            shares = 0
            for i in range(len(basic_shareCount)):
                if t == basic_tickers[i]:
                    if basic_types[i] == "BUY":
                        shares += basic_shareCount[i]
                    else:
                        shares -= basic_shareCount[i]
            currentTickers.append(t)
            currentSharecount.append(shares)
            currentValues.append(getPrice(t, sellDate))

        # Generate the sell
        portfolio = pd.DataFrame({'Ticker': currentTickers, 'ShareCount': currentSharecount, 'Value': currentValues})
        sell = generateSell(portfolio, sellDate)

        # Append the values to the lists
        basic_values.append(sell[2])
        basic_types.append("SELL")
        basic_tickers.append(sell[0])
        basic_dates.append(sellDate)
        basic_shareCount.append(sell[1])
        basic_fx.append(sell[3])
        basic_currency.append(sell[4])
        
        # Re-calculate the current balance
        currentBalance = getValue(buyDate) + sum(basic_values) 

    buyValue = max(10, random.randint(math.floor(currentBalance*minSpend), math.ceil(currentBalance*maxSpend))) 
    buy = generateBuy(buyDate, buyValue)
    basic_values.append(-buyValue)
    basic_types.append("BUY")
    basic_tickers.append(buy[0])
    basic_dates.append(buyDate)
    basic_shareCount.append(buy[1])
    basic_fx.append(buy[2])
    basic_currency.append(buy[3])

# Generate the last buy to use up the remaining balance
lastBuyDate = buyDates[len(buyDates) - 1]
currentBalance = getValue(lastBuyDate) + sum(basic_values) 
buy = generateBuy(lastBuyDate, currentBalance)

basic_values.append(-currentBalance)
basic_types.append("Buy")
basic_tickers.append(buy[0])
basic_dates.append(buyDate)
basic_shareCount.append(buy[1])
basic_fx.append(buy[2])
basic_currency.append(buy[3])

df_basic = pd.DataFrame({
    'Date': basic_dates, 
    'Value': basic_values, 
    'Type': basic_types, 
    'ShareCount': basic_shareCount, 
    'FXRate': basic_fx, 
    'Name': basic_tickers, 
    'Currency': basic_currency
})

In [28]:
# Format and save data

# Dividends are not being synthesised, so create an empty version
df_dividend = pd.DataFrame(columns=['Date', 'Value', 'Name'])

df_basic.to_csv('./cleaned/synthetic/basic.csv', index=False)
df_dividend.to_csv('./cleaned/synthetic/dividend.csv', index=False)
df_other.to_csv('./cleaned/synthetic/other.csv', index=False)
