In [None]:
import pandas as pd
import numpy as np
import random
import math
import yfinance as yf
pd.options.mode.chained_assignment = None  # default='warn'

from pandas_datareader import data as web # Reads stock data 
from datetime import timedelta

In [None]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [None]:
# Default Variables
startDate = getDateFormat(1, 1, 2010)
endDate = pd.to_datetime("today")
endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)

depositAmount = 50000
depositNo = 20

fxRate = 1.35

buyNo = 400
sellNo = 200

maxSellPer = 0.2

noisiness = 0.3

# 2 of tech, food, energy, etfs, finance, health
tickers = ["AAPL", "GOOG", "MCD", "K", "XOM", "PLUG", "SPY", "HUKX.L", "JPM", "BARC.L", "PFE", "JNJ"]

In [None]:
# Get user input

In [None]:
# Print paramaters
startDate_format = startDate.strftime('%d/%m/%Y')
endDate_format = endDate.strftime('%d/%m/%Y')
months = ((endDate - startDate)/np.timedelta64(1, 'M'))

length_years = int(months/12)
length_months = int(months%12)
print("Simulating investments from {0} to {1} ({2} year(s) and {3} month(s))".format(startDate_format, endDate_format, length_years, length_months))

meanDeposit = round(depositAmount/depositNo, 2)
print("Simulating {0} deposits totalling £{1} for a mean deposit of £{2}".format(depositNo, depositAmount, meanDeposit))

print("Simulating {0} buys spread across the following {1} companies:".format(buyNo, len(tickers)))
print(tickers)
print("Simulating {0} sells at no more than {1}% of the portfolio value per sell".format(sellNo, int(maxSellPer*100)))

print("Assuming FX rate of £1 = ${0}".format(fxRate))

In [None]:
# Simulate deposits

# Generate 'depositNo' random numbers (between 0 and 1) to represent each deposit
randValues = [ random.random() for i in range(depositNo) ]

# Convert these random floats to deposit values
# sum(rand) will be ~ 1/2 of depositNo, and i will be ~0.5 on avg, leading to the sum of all values being close to depositAmount
# But the use of floor() means the sum will be a little lower than depositAmount
depositValues = [ math.floor(i * depositAmount / sum(randValues)) for i in randValues ] 

# Randomly add 1 to values until we reach depositAmount
deficit = depositAmount - sum(depositValues)
for i in range(deficit): 
    depositValues[random.randint(0, depositNo - 1)] += 1

# We don't want any deposits closer to today than 30 days, but this won't work if the total number of days is too small, so take the minimum between 30 and 1/4 of the total days
totalDays = ((endDate - startDate)/np.timedelta64(1, 'D'))
days_limit = totalDays - min(int(totalDays/4), 30)

# Generate list of random numbers (acting as days to add to the start date), then add them to generate the list of random dates
randDays = [ random.randint(0, days_limit) for i in range(depositNo) ]
depositDates = [ startDate + timedelta(days=r) for r in sorted(randDays) ]

depositTypes = ["Deposit"] * depositNo

df_other = pd.DataFrame({'Date': depositDates, 'Value': depositValues, 'Type': depositTypes})

In [None]:
# Import all closing prices for each ticker for the required date ranges
df_closing_all = []

for ticker in tickers:
    df_closing_ticker = []
    # Get the close prices for each day
    df_closing_ticker = web.DataReader(ticker, 'yahoo', startDate, endDate)    

    # Format the df
    df_closing_ticker.drop(columns=["High", "Low", "Open", "Volume", "Adj Close"], errors="ignore", inplace=True)
    df_closing_ticker["Name"] = ticker

    tickerCurrency = "GBP" if ticker[len(ticker) - 2] == "." else "USD"

    # If it's USD, divide by the exchange rate for the relevant day
    if tickerCurrency == "USD":
        df_closing_ticker["Close"] = df_closing_ticker["Close"]/fxRate

    # If it's GBp, divide by 100 to convert to £
    elif tickerCurrency == "GBP":
        df_closing_ticker["Close"] = df_closing_ticker["Close"]/100
    else:
        print("ERROR: Unidentified Currency - ", ticker)
    
    df_closing_all.append(df_closing_ticker.reset_index())

# Concatenate all data
df_closing_all = pd.concat(df_closing_all)

In [None]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [None]:
def getValue(date):
    df_depositsInRange = df_other.loc[df_other["Type"] == "Deposit", ["Date", "Value"]]
    df_depositsInRange = df_depositsInRange.loc[df_depositsInRange["Date"] <= date]
    total_deposits = df_depositsInRange["Value"].sum()

In [65]:
# Simulate half the buys
def simulateBuys(buyNo, money):

    # Generate dates
    totalDays = ((endDate - startDate)/np.timedelta64(1, 'D'))
    randDays = [ random.randint(0, totalDays) for i in range(buyNo) ]
    buyDates = [ startDate + timedelta(days=r) for r in sorted(randDays) ]

    # Generate amounts of money
        #   
        #
        #
        #TODO FIXXX
        #
        #
        #    
    randValues = [ random.random() for i in range(buyNo) ]
    buyValues = [ math.floor(i * money / sum(randValues)) for i in randValues ] 

    # Generate tickers
    buyTickers = random.choices(tickers, k=buyNo)

    # Calculate shareCount and fill types, fx and currencies
    buyShareCounts = []
    buyTypes = []
    buyFx = []
    buyCurrencies = []

    for i in range(buyNo):
        ticker = buyTickers[i]
        value = buyValues[i]
        date = buyDates[i]
        price = df_closing_all.loc[(df_closing_all["Date"] == date) & (df_closing_all["Name"] == ticker)]["Close"]
        dayCounter = 1

        while len(price) == 0:
            date = date - timedelta(days=dayCounter)
            price = df_closing_all.loc[(df_closing_all["Date"] == date) & (df_closing_all["Name"] == ticker)]["Close"]
            dayCounter += 1

        buyShareCounts.append(price.values[0]/value)

        buyTypes.append("BUY")

        #   
        #
        #
        #TODO FIXXX
        #
        #
        #

        tickerCurrency = "GBP" if ticker[len(ticker) - 2] == "." else "USD"

        if tickerCurrency == "GBP":
            buyFx.append(1)
            buyCurrencies.append("GBP")
        else:
            buyFx.append(fxRate)
            buyCurrencies.append("USD")
            
        buyFx.append(fxRate)
        buyCurrencies.append("USD")

    return pd.DataFrame(list(zip(buyDates, buyTypes, buyValues, buyShareCounts, buyFx, buyTickers, buyCurrencies)),columns =['Date','Type','Value','ShareCount','FXRate','Name','Currency'])

In [66]:
# Simulate buys and sells
initialBuy_no = int(buyNo/2)
initialBuy_amount = int(depositAmount/2)
print(simulateBuys(initialBuy_no, initialBuy_amount))

          Date Type  Value  ShareCount  FXRate    Name Currency
0   2010-01-28  BUY      3   17.243333    1.00  HUKX.L      GBP
1   2010-02-10  BUY    248    0.020814    1.35    AAPL      USD
2   2010-03-17  BUY    195    1.070177    1.35    GOOG      USD
3   2010-03-25  BUY     42    1.383095    1.35  HUKX.L      USD
4   2010-04-29  BUY    141    1.392207    1.35    GOOG      USD
..         ...  ...    ...         ...     ...     ...      ...
195 2021-07-23  BUY    160    2.036759    1.35     SPY      USD
196 2021-09-14  BUY    165   12.875960    1.35    GOOG      USD
197 2021-09-19  BUY    123    0.896055    1.35    AAPL      USD
198 2021-09-28  BUY    153    0.208376    1.00     PFE      GBP
199 2021-11-08  BUY     16    2.237500    1.35     PFE      USD

[200 rows x 7 columns]


In [None]:
# Format and save data

# Dividends are not being synthesised, so create an empty version
df_dividend = pd.DataFrame(columns=['Date', 'Value', 'Name'])

# df_basic.to_csv('./cleaned/basic.csv', index=False)
df_dividend.to_csv('./cleaned/synthetic/dividend.csv', index=False)
df_other.to_csv('./cleaned/synthetic/other.csv', index=False)
