In [None]:
import pandas as pd
import numpy as np
import random
import math
import yfinance as yf
pd.options.mode.chained_assignment = None  # default='warn'

from pandas_datareader import data as web # Reads stock data 
from datetime import timedelta

In [None]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [None]:
# Default Variables
startDate = getDateFormat(1, 1, 2010)
endDate = pd.to_datetime("today")
endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)

depositAmount = 50000
depositNo = 20

buyNo = 400
sellNo = 200

maxSellPer = 0.2

noisiness = 0.3

# 2 of tech, food, fashion, energy, etfs, finance, health, service
tickers = ["AAPL", "GOOG", "GPS", "BRBY.L", "MCD", "K", "BP", "XOM", "SPY", "HUKX.L", "JPM", "BARC.L", "PFE", "JNJ", "UBER", "ABNB"]

In [None]:
# Get user input

In [None]:
# Print paramaters
startDate_format = startDate.strftime('%d/%m/%Y')
endDate_format = endDate.strftime('%d/%m/%Y')
months = ((endDate - startDate)/np.timedelta64(1, 'M'))

length_years = int(months/12)
length_months = int(months%12)
print("Simulating investments from {0} to {1} ({2} year(s) and {3} month(s))".format(startDate_format, endDate_format, length_years, length_months))

meanDeposit = round(depositAmount/depositNo, 2)
print("Simulating {0} deposits totalling £{1} for a mean deposit of £{2}".format(depositNo, depositAmount, meanDeposit))

print("Simulating {0} buys spread across the following {1} companies:".format(buyNo, len(tickers)))
print(tickers)
print("Simulating {0} sells at no more than {1}% of the portfolio value per sell".format(sellNo, int(maxSellPer*100)))

In [None]:
# Simulate deposits

# Generate 'depositNo' random numbers (between 0 and 1) to represent each deposit
randValues = [ random.random() for i in range(depositNo) ]

# Convert these random floats to deposit values
# sum(rand) will be ~ 1/2 of depositNo, and i will be ~0.5 on avg, leading to each value being centered on depositAmount/depositNo, which is what we want
# But the use of floor() means the sum will be a little lower than depositAmount
depositValues = [ math.floor(i * depositAmount / sum(randValues)) for i in randValues ] 

# Randomly add 1 to values until we reach depositAmount
deficit = depositAmount - sum(depositValues)
for i in range(deficit): 
    depositValues[random.randint(0, depositNo - 1)] += 1

totalDays = ((endDate - startDate)/np.timedelta64(1, 'D'))

# We don't want any deposits closer to today than 30 days, but this won't work if the total number of days is too small, so take the minimum between 30 and 1/4 of the total days
days_limit = totalDays - min(int(totalDays/4), 30)

# Generate list of random numbers (acting as days to add to the start date), then add them to generate the list of random dates
randDays = [ random.randint(0, days_limit) for i in range(depositNo) ]
depositDates = [ startDate + timedelta(days=r) for r in sorted(randDays) ]

depositTypes = ["Deposit"] * depositNo

df_other = pd.DataFrame({'Date': depositDates, 'Value': depositValues, 'Type': depositTypes})

In [None]:
# Simulate half the buys

In [None]:
# Simulate the sells

In [None]:
# Simulate the other half of buys

In [None]:
# Format and save data

# Dividends are not being synthesised, so create an empty version
df_dividend = pd.DataFrame(columns=['Date', 'Value', 'ISIN', 'Name'])

# df_basic.to_csv('./cleaned/basic.csv', index=False)
df_dividend.to_csv('./cleaned/synthetic/dividend.csv', index=False)
df_other.to_csv('./cleaned/synthetic/other.csv', index=False)
