In [1]:
%%capture
import sys
!pip install tensorflow-addons
!git clone https://github.com/ICascha/QuantGANs-replication.git
sys.path.append('/content/QuantGANs-replication/')

In [2]:
from backend.preprocessing import *
from backend.metrics import *
from backend.gan import CGAN
from backend.tcn import make_TCN

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.random import normal
from tensorflow.keras.utils import Progbar
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Concatenate, Input
from datetime import datetime, timedelta

In [3]:
# dataset with returns and covar info of most publicly traded US stocks, can be gathered from: 
# https://www.openicpsr.org/openicpsr/project/112877/version/V1/view
df = pd.read_csv('/content/covar_dataset.csv', parse_dates=['week']).iloc[: ,1:]
sic = df['HSICCD'].astype('str').str
df = df[sic.startswith('60') | sic.startswith('61') | sic.startswith('62') | sic.startswith('6711') | sic.startswith('6712')]
df = df[['week', 'ret', 'marketret', 'ted', 'permno', 'SIZE']]
# train cutt-off
df = df[df['week'] <= np.datetime64('2005-01-01')]

In [4]:
# standardize returns
for permno in df['permno'].unique():
    mask = df['permno'] == permno
    df.loc[mask, 'ret'] = (df[mask]['ret'] - df[mask]['ret'].mean())/df[mask]['ret'].std()

In [5]:
# standardize ted and marketret
df_mkt_ted = df[['week', 'marketret', 'ted']].drop_duplicates()
std_mkt =  (df_mkt_ted['marketret'] - df_mkt_ted['marketret'].mean())/df_mkt_ted['marketret'].std()
std_ted = (df_mkt_ted['ted'] - df_mkt_ted['ted'].mean())/df_mkt_ted['ted'].std()
mapping_mkt = {x:y for x,y in zip(df_mkt_ted['marketret'], std_mkt)}
mapping_ted = {x:y for x,y in zip(df_mkt_ted['ted'], std_ted)}

df = df.replace({'marketret': mapping_mkt, 'ted': mapping_ted})

In [6]:
dilations = 2**np.arange(3)
fixed_filters, moving_filters = 40, 10
rfs = 1 + 2 * sum(dilations)

discriminator = make_TCN(dilations, fixed_filters, moving_filters, False, True, False, [5, rfs*2-1, 1], block_size=4)
generator = make_TCN(dilations, fixed_filters, moving_filters, False, False, False, [10, None, 3], halve_output_series=True)

In [7]:
df = df.sort_values(by='week')

length = 2*rfs - 1
n_series = 3

a_week = np.timedelta64(1, 'W')
window_length =  np.timedelta64(length-1, 'W')
window_permno = []
valid_windows = []

unique_weeks = df['week'].unique()

pb = Progbar(len(unique_weeks))

# check for each window started from all unique weeks in dataset which companies
# do not have any missing values in this dataset, then save those companies
# pemrno and size in window_permno, and if > n_series companies are in a window
# it can be used for training is is thus valid.
for j, week in enumerate(unique_weeks):

    window_permno.append([])
    week_range = (week, week + window_length)
    df_window = df[(df['week'] >= week_range[0]) & (df['week'] <= week_range[1])]
    
    for permno in df_window['permno'].unique():
        permno_mask = df_window['permno'] == permno

        if sum(permno_mask) == length:
                window_permno[-1].append((permno, df_window[permno_mask]['SIZE'].mean()))
    
    window_permno[-1] = np.array(window_permno[-1])
    if len(window_permno[-1]) >= n_series:
        valid_windows.append(j)
    pb.update(j)



In [8]:
min_week = df['week'].min().to_numpy()

class cGANMultiAsset(CGAN):
    
    #Override train method
    def train(self, window_permno, valid_windows, length, batch_size, n_batches, additional_d_steps):
        progress = Progbar(n_batches)

        for n_batch in range(n_batches):
            
            batch = np.zeros((batch_size, length, n_series + 2))
            windows_batch = np.random.choice(valid_windows, size=batch_size, replace = len(valid_windows) < batch_size)
            for j, w in enumerate(windows_batch):
                
                delta_week = np.timedelta64(w, 'W')
                week_range = unique_weeks[w], unique_weeks[w+length-1]
                df_window = df[(df['week'] >= week_range[0]) & (df['week'] <= week_range[1])]
                # sample multinomially with p = lambda * company size
                assets = np.random.choice(window_permno[w][:, 0], replace=False, p=window_permno[w][:, 1]/window_permno[w][:, 1].sum(), size=n_series)
                for i in range(len(assets)-1):
                    if len(df_window[df_window['permno'] == assets[i]]['ret'].to_numpy()) != 29:
                        print(df_window[df_window['permno'] == assets[i]])

                    batch[j,:,i] = df_window[df_window['permno'] == assets[i]]['ret'].to_numpy()
            
            batch[j, :, n_series-1:] = df_window[df_window['permno'] == assets[-1]][['ret', 'marketret', 'ted']].to_numpy()

            batch = np.swapaxes(np.expand_dims(batch, -1), 1, 2).astype('float32')
            self.train_step(batch, batch_size, additional_d_steps)

            self.train_hook(n_batch)

            progress.update(n_batch + 1)
        
gan = cGANMultiAsset(discriminator, generator, rfs*2-1, lr_d=1e-4, lr_g=9e-5)

[10, 29, 3]


In [9]:
batch_size = 64
n_steps = 10000
gan.train(window_permno, valid_windows, rfs*2-1, batch_size, n_steps, 0)



In [10]:
%%capture
generator.save('cond_generator')

INFO:tensorflow:Assets written to: cond_generator/assets


INFO:tensorflow:Assets written to: cond_generatore/assets


In [11]:
!zip -r train_covar_banks_cond_generator.zip cond_generator/ 

updating: cond_generator_sparse/ (stored 0%)
updating: cond_generator_sparse/variables/ (stored 0%)
updating: cond_generator_sparse/variables/variables.index (deflated 78%)
updating: cond_generator_sparse/variables/variables.data-00000-of-00001 (deflated 8%)
updating: cond_generator_sparse/keras_metadata.pb (deflated 96%)
updating: cond_generator_sparse/saved_model.pb (deflated 94%)
updating: cond_generator_sparse/assets/ (stored 0%)
