In [None]:
# Step 1: Collect Real Market Data
import yfinance as yf

# Download S&P 500 historical data
data = yf.download("^GSPC", start="2010-01-01", end="2023-01-01")
data.to_csv("sp500.csv")  # Save for later use
print(data.head())

In [2]:
# Step 2: Preprocess the Data
# Preprocess the data by normalizing it and converting it into a format suitable for training a GAN.

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load data
data = pd.read_csv("sp500.csv", index_col="Date", parse_dates=True)

# Use 'Close' prices for training
prices = data[["Close"]].values

# Normalize data to range [0, 1]
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_prices = scaler.fit_transform(prices)

In [None]:
# Step 3: Train a GAN for Synthetic Data Generation
# Train a GAN to generate synthetic time series data that mimics the S&P 500 closing prices.

import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, LeakyReLU, BatchNormalization, Input
from tensorflow.keras.models import Model, Sequential

# Define the GAN generator
def build_generator():
    model = Sequential([
        Dense(128, input_dim=100),
        LeakyReLU(alpha=0.2),
        BatchNormalization(),
        Dense(256),
        LeakyReLU(alpha=0.2),
        BatchNormalization(),
        Dense(1, activation='tanh')  # Output a single value (scaled price)
    ])
    return model

# Define the GAN discriminator
def build_discriminator():
    model = Sequential([
        Dense(128, input_dim=1),
        LeakyReLU(alpha=0.2),
        Dense(256),
        LeakyReLU(alpha=0.2),
        Dense(1, activation='sigmoid')  # Binary classification (real/fake)
    ])
    return model

# Combine the GAN
generator = build_generator()
discriminator = build_discriminator()

discriminator.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))
discriminator.trainable = False

gan_input = Input(shape=(100,))
fake_data = generator(gan_input)
gan_output = discriminator(fake_data)
gan = Model(gan_input, gan_output)
gan.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(0.0002, 0.5))

# Train the GAN
epochs = 10000
batch_size = 32
for epoch in range(epochs):
    noise = np.random.normal(0, 1, (batch_size, 100))
    fake_prices = generator.predict(noise)
    real_prices = scaled_prices[np.random.randint(0, len(scaled_prices), batch_size)]
    discriminator_loss_real = discriminator.train_on_batch(real_prices, np.ones((batch_size, 1)))
    discriminator_loss_fake = discriminator.train_on_batch(fake_prices, np.zeros((batch_size, 1)))
    discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)
    generator_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")

In [None]:
# Utility Function for Saving   
def save_models_and_data(generator, discriminator, gan, synthetic_data, scaler):
    generator.save("generator_model.h5")
    discriminator.save("discriminator_model.h5")
    gan.save("gan_model.h5")
    np.save("synthetic_data.npy", synthetic_data)
    with open("scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)

In [None]:
# Utility Function for Loading
def load_models_and_data():
    generator = load_model("generator_model.h5")
    discriminator = load_model("discriminator_model.h5")
    gan = load_model("gan_model.h5")
    synthetic_data = np.load("synthetic_data.npy")
    with open("scaler.pkl", "rb") as f:
        scaler = pickle.load(f)
    return generator, discriminator, gan, synthetic_data, scaler

In [None]:
# USAGE
# Save everything
save_models_and_data(generator, discriminator, gan, synthetic_data, scaler)

# Load everything
generator, discriminator, gan, synthetic_data, scaler = load_models_and_data()

In [None]:
# Step 4: Simulate Extreme Scenarios
# Modify the synthetic data to simulate extreme market conditions, such as a crash or high volatility.

# Generate synthetic data
noise = np.random.normal(0, 1, (1000, 100))
synthetic_prices = generator.predict(noise)

# Simulate a crash (e.g., 50% drop)
crash_prices = synthetic_prices * 0.5

# Simulate high volatility (add random noise)
volatile_prices = synthetic_prices + np.random.normal(0, 0.1, synthetic_prices.shape)

In [None]:
# Step 5: Perform Stress Testing
# Use the synthetic data to test a financial model or portfolio. 
# For example, calculate the portfolio value under extreme conditions.
# Example: Calculate portfolio value under crash scenario
initial_portfolio_value = 1000000  # $1,000,000
portfolio_value_crash = initial_portfolio_value * (1 + crash_prices)
print("Portfolio values under crash scenario:", portfolio_value_crash)

In [None]:
# Step 6: Analyze Results
# Evaluate the impact of the simulated scenarios on the portfolio or financial model.

import matplotlib.pyplot as plt

# Real vs Synthetic Prices Plot
plt.figure(figsize=(12, 6))
plt.plot(scaler.inverse_transform(scaled_prices), label="Real Prices")
plt.plot(scaler.inverse_transform(synthetic_prices), label="Synthetic Prices")
plt.plot(scaler.inverse_transform(crash_prices), label="Crash Scenario", linestyle="--")
plt.legend()
plt.title("Real vs Synthetic Prices")
plt.xlabel("Time")
plt.ylabel("Price")
plt.show()

# Portfolio Value Under Crash Scenario
portfolio_value_crash = initial_portfolio_value * (1 + crash_prices)
plt.figure(figsize=(12, 6))
plt.plot(portfolio_value_crash, label="Portfolio Value Under Crash")
plt.legend()
plt.title("Portfolio Value Under Crash Scenario")
plt.xlabel("Time")
plt.ylabel("Portfolio Value ($)")
plt.show()

In [None]:
# Value-at-Risk (VaR) and Conditional Value-at-Risk (CVaR):
# Calculate and plot VaR and CVaR to quantify the potential losses under extreme scenarios.

import numpy as np
losses = initial_portfolio_value - portfolio_value_crash
var = np.percentile(losses, 95)  # 95% VaR
cvar = losses[losses >= var].mean()  # 95% CVaR
print(f"95% VaR: ${var:.2f}, 95% CVaR: ${cvar:.2f}")