In [None]:
import pandas as pd
import numpy as np
import random
import math
import yfinance as yf
pd.options.mode.chained_assignment = None  # default='warn'

from numpy.random import choice
from pandas_datareader import data as web # Reads stock data 
from datetime import timedelta

In [None]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [None]:
# Default Variables
startDate = getDateFormat(1, 1, 2010)
endDate = pd.to_datetime("today")
endDate = endDate.replace(hour=0, minute=0, second=0, microsecond=0)

depositAmount = 50000
depositNo = 20

fxRate = 1.35

buyNo = 400
sellNo = 200

maxSellPer = 0.2

noisiness = 0.3

# 2 of tech, food, energy, etfs, finance, health
tickers = ["AAPL", "GOOG", "MCD", "K", "XOM", "PLUG", "SPY", "HUKX.L", "JPM", "BARC.L", "PFE", "JNJ"]

In [None]:
# Get user input

In [None]:
# Print paramaters
startDate_format = startDate.strftime('%d/%m/%Y')
endDate_format = endDate.strftime('%d/%m/%Y')
months = ((endDate - startDate)/np.timedelta64(1, 'M'))

length_years = int(months/12)
length_months = int(months%12)
print("Simulating investments from {0} to {1} ({2} year(s) and {3} month(s))".format(startDate_format, endDate_format, length_years, length_months))

meanDeposit = round(depositAmount/depositNo, 2)
print("Simulating {0} deposits totalling £{1} for a mean deposit of £{2}".format(depositNo, depositAmount, meanDeposit))

print("Simulating {0} buys spread across the following {1} companies:".format(buyNo, len(tickers)))
print(tickers)
print("Simulating {0} sells at no more than {1}% of the portfolio value per sell".format(sellNo, int(maxSellPer*100)))

print("Assuming FX rate of £1 = ${0}".format(fxRate))

In [None]:
# Import all closing prices for each ticker for the required date ranges
df_closing_all = []

for ticker in tickers:
    df_closing_ticker = []
    # Get the close prices for each day
    df_closing_ticker = web.DataReader(ticker, 'yahoo', startDate, endDate)    

    # Format the df
    df_closing_ticker.drop(columns=["High", "Low", "Open", "Volume", "Adj Close"], errors="ignore", inplace=True)
    df_closing_ticker["Name"] = ticker

    tickerCurrency = "GBP" if ticker[len(ticker) - 2] == "." else "USD"

    # If it's USD, divide by the exchange rate for the relevant day
    if tickerCurrency == "USD":
        df_closing_ticker["Close"] = df_closing_ticker["Close"]/fxRate

    # If it's GBp, divide by 100 to convert to £
    elif tickerCurrency == "GBP":
        df_closing_ticker["Close"] = df_closing_ticker["Close"]/100
    else:
        print("ERROR: Unidentified Currency - ", ticker)
    
    df_closing_all.append(df_closing_ticker.reset_index())

# Concatenate all data
df_closing_all = pd.concat(df_closing_all)

In [None]:
# Get a valid pandas date
def getDateFormat(day, month, year):
    return pd.to_datetime(f"{year}-{month}-{day}")

In [95]:
# Generate 'depositNo' random numbers (between 0 and 1) to represent each deposit
randValues = [ random.random() for i in range(depositNo) ]

# Convert these random floats to deposit values
# sum(rand) will be ~ 1/2 of depositNo, and i will be ~0.5 on avg, leading to the sum of all values being close to depositAmount
# But the use of floor() means the sum will be a little lower than depositAmount
depositValues = [ math.floor(i * depositAmount / sum(randValues)) for i in randValues ]

# Randomly add 1 to values until we reach depositAmount
deficit = depositAmount - sum(depositValues)
for i in range(deficit): 
    depositValues[random.randint(0, depositNo - 1)] += 1

20


In [115]:
# Generate the event dates
totalEvents = depositNo + buyNo + sellNo
totalDays = ((endDate - startDate)/np.timedelta64(1, 'D'))
randDays = [ random.randint(0, totalDays) for i in range(totalEvents) ]
eventDates = [ startDate + timedelta(days=r) for r in sorted(randDays) ]

# The event type options (buy, sell, or deposit)
options = ["B", "S", "D"]

# The weights to influence the randomly generated event type
# To start with, we need a deposit
weight_buy, weight_sell = 0, 0
weight_deposit = 1 

# Counters to keep track of how many of each event we've generated
currentNo_deposits, currentNo_buys, currentNo_sells = 0, 0, 0

# The lists to hold all the values to be merged into the resulting dataframes
basic_values, basic_types, basic_tickers, basic_dates, basic_shareCount, basic_fx, basic_currency= [], [], [], [], [], [], []
other_values, other_dates = [], []
other_types = ["Deposit"] * depositNo

# Counter to keep track of how many days left until the end date, starts as the total number of days
daysLeft = totalEvents

def fillBuyDetails(value):
    print(value)
    basic_currency.append("test")
    basic_tickers.append("test")
    basic_shareCount.append("test")
    basic_fx.append("test")

# Procedually generate the events
for i in range(totalEvents):

    # Normalise the weights
    weightSum = weight_buy + weight_sell + weight_deposit
    weights = [weight_buy/weightSum, weight_sell/weightSum, weight_deposit/weightSum]

    # Generate the event type (buy, sell, or deposit), based on the weights
    eventType = choice(options, 1, p=weights)

    date = eventDates[i]

    currentBalance = (sum(other_values) + sum(basic_values))    # The amount of money in the bank

    if eventType == "D":
        other_dates.append(date)
        currentNo_deposits += 1
        other_values.append(depositValues[depositNo - currentNo_deposits])

    elif eventType == "B":
        basic_dates.append(date)
        basic_types.append("BUY")

        #TODO SIM ALL BUYS FIRST!!!
        
        # Predict the rough value we should be aiming for, taking into account the amount we have in the bank along with the result of future transactions
        buysRemaining = buyNo - currentNo_buys                      # The no. of buys left to generate
        moneyRemaining = depositAmount - sum(other_values)          # The amount of money left to deposit
        buyValue = random.randint(math.floor(currentBalance*0.1), math.ceil(currentBalance*0.3))
        basic_values.append(-buyValue)
        fillBuyDetails(buyValue)
        currentNo_buys +=1

    elif eventType == "S":
        basic_dates.append(date)
        basic_types.append("SELL")
        currentNo_sells += 1
        

    # Update the number of days left until the end
    daysLeft = ((endDate - date)/np.timedelta64(1, 'D'))

    # Update the weights based on all events generated so far
    weight_deposit = depositNo - currentNo_deposits

    # Dont do any sells until we've done at least 1/4 of buys
    if currentNo_buys > buyNo/4:
        weight_sell = sellNo - currentNo_sells

    if currentBalance > 10:
        weight_buy = buyNo - currentNo_buys
    else:
        weight_buy = buyNo - currentNo_buys
        weight_sell = sellNo - currentNo_sells

print("Deposits expected:", depositNo, "Actual:", currentNo_deposits)
print("Deposits £ expected:", depositAmount, "Actual:", sum(other_values))
print("Buys expected:", buyNo, "Actual:", currentNo_buys)
print("Sells expected:", sellNo, "Actual:", currentNo_sells)

df_other = pd.DataFrame({
    'Date': other_dates, 
    'Value': other_values, 
    'Type': other_types
})

# df_basic = pd.DataFrame({
#     'Date': basic_dates, 
#     'Value': basic_values, 
#     'Type': basic_types, 
#     'ShareCount': basic_shareCount, 
#     'FXRate': basic_fx, 
#     'Name': basic_tickers, 
#     'Currency': basic_currency
# })

# print(df_basic)
# print(df_other)

1149
472
530
384
430
141
319
182
209
61
110
45
58
56
34
45
33
20
20
10
9
5
2
3
4
1
3
1
0
0
1
2
1
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
365
326
179
395
121
251
180
71
55
60
45
396
140
167
211
142
93
42
514
368
232
635
1357
797
587
208
235
390
326
188
130
80
61
43
69
174
223
173
74
49
54
27
28
41
44
16
11
20
22
8
16
6
8
5
3
5
2
2
3
0
0
1
1
0
0
0
0
1
1
0
0
840
563
323
633
428
2021
1174
1699
559
1084
671
612
282
242
131
130
229
87
112
53
66
85
21
56
85
99
95
73
26
38
37
23
16
6
6
11
5
3
3
2
1
3
1
2
1
0
434
373
306
313
266
196
150
210
110
868
494
731
274
641
285
254
194
131
198
87
62
43
34
27
29
54
29
23
16
17
17
7
6
7
3
1
1
2
3
1
3
2
1
0
1
1
1
0
0
0
0
0
0
0
0
588
750
285
478
368
88
166
149
47
45
45
69
72
343
399
385
337
233
68
153
53
48
35
37
37
21
22
29
13
13
10
12
2
3
5
3
1
3
2
2
0
1
2
523
386
333
91
121
163
42
417
190
126
136
288
93
98
131
84
50
28
37
35
27
30
15
16
7
3
5
7
4
5
2
1
1
1
1
1
2
2
0
1
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [None]:
# Format and save data

# Dividends are not being synthesised, so create an empty version
df_dividend = pd.DataFrame(columns=['Date', 'Value', 'Name'])

# df_basic.to_csv('./cleaned/basic.csv', index=False)
df_dividend.to_csv('./cleaned/synthetic/dividend.csv', index=False)
df_other.to_csv('./cleaned/synthetic/other.csv', index=False)
buys_total.to_csv('./cleaned/synthetic/basic.csv', index=False)
