In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import os
import ta

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class Generator(nn.Module):
    def __init__(self, noise_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim),
        )

    def forward(self, x):
        return self.model(x)

# Define the Discriminator model
class Discriminator(nn.Module):
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        return self.model(x)

In [None]:
def train_gan_for_stock(stock_data, num_epochs, noise_dim, lr_g=1e-4, lr_d=1e-4):
    # Ensure stock_data is a DataFrame with a 'Close' column
    if isinstance(stock_data, pd.DataFrame) and 'Close' in stock_data.columns:
        train_data = stock_data['Close'].values.reshape(-1, 1)  # Only train on Close prices
    else:
        raise ValueError("stock_data must be a DataFrame containing a 'Close' column")

    num_features = train_data.shape[1]

    # Initialize models
    generator = Generator(noise_dim, num_features).to(device)
    discriminator = Discriminator(num_features).to(device)

    # Optimizers
    optimizer_g = optim.Adam(generator.parameters(), lr=lr_g)
    optimizer_d = optim.Adam(discriminator.parameters(), lr=lr_d)

    # Loss function
    criterion = nn.BCELoss()

    # Training loop
    for epoch in range(num_epochs):
        # Generate fake data
        noise = torch.randn(train_data.shape[0], noise_dim).to(device)
        fake_data = generator(noise)

        # Real data
        real_data = torch.tensor(train_data, dtype=torch.float).to(device)

        # Train Discriminator
        optimizer_d.zero_grad()
        real_loss = criterion(discriminator(real_data), torch.ones(real_data.size(0), 1).to(device))
        fake_loss = criterion(discriminator(fake_data.detach()), torch.zeros(fake_data.size(0), 1).to(device))
        d_loss = real_loss + fake_loss
        d_loss.backward()
        optimizer_d.step()

        # Train Generator
        optimizer_g.zero_grad()
        g_loss = criterion(discriminator(fake_data), torch.ones(fake_data.size(0), 1).to(device))
        g_loss.backward()
        optimizer_g.step()

        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], d_loss: {d_loss.item()}, g_loss: {g_loss.item()}')

    return generator

In [None]:
def recalculate_indicators(synthetic_data):
    df = pd.DataFrame(synthetic_data, columns=['Open', 'High', 'Low', 'Close', 'Volume'])
    
    df['SMA_50'] = ta.trend.SMAIndicator(close=df['Close'], window=50).sma_indicator()
    df['EMA_50'] = ta.trend.EMAIndicator(close=df['Close'], window=50).ema_indicator()
    df['RSI'] = ta.momentum.RSIIndicator(close=df['Close']).rsi()
    df['Stoch_RSI'] = ta.momentum.StochRSIIndicator(close=df['Close']).stochrsi()
    df['Williams_R'] = ta.momentum.WilliamsRIndicator(high=df['High'], low=df['Low'], close=df['Close']).williams_r()
    df['Awesome_Oscillator'] = ta.momentum.AwesomeOscillatorIndicator(high=df['High'], low=df['Low']).awesome_oscillator()
    df['MACD'] = ta.trend.MACD(close=df['Close']).macd()
    df['MACD_Signal'] = ta.trend.MACD(close=df['Close']).macd_signal()
    df['MACD_Diff'] = ta.trend.MACD(close=df['Close']).macd_diff()
    df['TSI'] = ta.momentum.TSIIndicator(close=df['Close']).tsi()
    df['KAMA'] = ta.momentum.KAMAIndicator(close=df['Close']).kama()
    df['ROC'] = ta.momentum.ROCIndicator(close=df['Close']).roc()

    # Trend Indicators
    df['Vortex_Diff'] = ta.trend.VortexIndicator(high=df['High'], low=df['Low'], close=df['Close']).vortex_indicator_diff()
    df['TRIX'] = ta.trend.TRIXIndicator(close=df['Close']).trix()
    df['Mass_Index'] = ta.trend.MassIndex(high=df['High'], low=df['Low']).mass_index()
    df['CCI'] = ta.trend.CCIIndicator(high=df['High'], low=df['Low'], close=df['Close']).cci()
    df['DPO'] = ta.trend.DPOIndicator(close=df['Close']).dpo()
    df['Ichimoku_A'] = ta.trend.IchimokuIndicator(high=df['High'], low=df['Low']).ichimoku_a()
    df['Ichimoku_B'] = ta.trend.IchimokuIndicator(high=df['High'], low=df['Low']).ichimoku_b()
    #Aroon Calculation
    window = 25
    rolling_high = df['Close'].rolling(window=window, min_periods=1).max()
    rolling_low = df['Close'].rolling(window=window, min_periods=1).min()
    df['Aroon_Up'] = 100 * df['Close'].rolling(window=window).apply(lambda x: (x.argmax() + 1) / window, raw=True)
    df['Aroon_Down'] = 100 * df['Close'].rolling(window=window).apply(lambda x: (x.argmin() + 1) / window, raw=True)
    df['Aroon_Indicator'] = df['Aroon_Up'] - df['Aroon_Down']
        
    # Volatility Indicators
    df['Bollinger_Mid'] = ta.volatility.BollingerBands(close=df['Close']).bollinger_mavg()
    df['Bollinger_Upper'] = ta.volatility.BollingerBands(close=df['Close']).bollinger_hband()
    df['Bollinger_Lower'] = ta.volatility.BollingerBands(close=df['Close']).bollinger_lband()
    df['Bollinger_PBand'] = ta.volatility.BollingerBands(close=df['Close']).bollinger_pband()
    df['Bollinger_WBand'] = ta.volatility.BollingerBands(close=df['Close']).bollinger_wband()
    df['Keltner_Channel_Center'] = ta.volatility.KeltnerChannel(high=df['High'], low=df['Low'], close=df['Close']).keltner_channel_mband()
    df['Keltner_Channel_Upper'] = ta.volatility.KeltnerChannel(high=df['High'], low=df['Low'], close=df['Close']).keltner_channel_hband()
    df['Keltner_Channel_Lower'] = ta.volatility.KeltnerChannel(high=df['High'], low=df['Low'], close=df['Close']).keltner_channel_lband()
    df['Donchian_Channel_Upper'] = ta.volatility.DonchianChannel(high=df['High'], low=df['Low'], close=df['Close']).donchian_channel_hband()
    df['Donchian_Channel_Lower'] = ta.volatility.DonchianChannel(high=df['High'], low=df['Low'], close=df['Close']).donchian_channel_lband()
    df['ATR'] = ta.volatility.AverageTrueRange(high=df['High'], low=df['Low'], close=df['Close']).average_true_range()

    # Volume Indicators
    df['OBV'] = ta.volume.OnBalanceVolumeIndicator(close=df['Close'], volume=df['Volume']).on_balance_volume()
    df['Chaikin_MF'] = ta.volume.ChaikinMoneyFlowIndicator(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume']).chaikin_money_flow()
    df['Force_Index'] = ta.volume.ForceIndexIndicator(close=df['Close'], volume=df['Volume']).force_index()
    df['Ease_of_Movement'] = ta.volume.EaseOfMovementIndicator(high=df['High'], low=df['Low'], volume=df['Volume']).ease_of_movement()
    df['Volume_Price_Trend'] = ta.volume.VolumePriceTrendIndicator(close=df['Close'], volume=df['Volume']).volume_price_trend()
    df['VWAP'] = ta.volume.VolumeWeightedAveragePrice(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume']).volume_weighted_average_price()
    
    try:
        df['SMA_200'] = ta.trend.SMAIndicator(close=df['Close'], window=200).sma_indicator()
        df['EMA_200'] = ta.trend.EMAIndicator(close=df['Close'], window=200).ema_indicator()
        df['ADX'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close']).adx()
        df['Vortex_Pos'] = ta.trend.VortexIndicator(high=df['High'], low=df['Low'], close=df['Close']).vortex_indicator_pos()
        df['Vortex_Neg'] = ta.trend.VortexIndicator(high=df['High'], low=df['Low'], close=df['Close']).vortex_indicator_neg()
    except ValueError as e:
        print(f"Could not calculate some indicators due to insufficient data: {e}")

    return df

In [None]:
def generate_synthetic_close_prices(generator, noise_dim, num_samples):
    noise = torch.randn(num_samples, noise_dim, device=device)
    synthetic_close_prices = generator(noise).detach().cpu().numpy().flatten()
    return synthetic_close_prices


In [None]:
def derive_high_low_close(close_prices):
    high_prices = close_prices * 1.02  # Assume High = Close + 2% of Close
    low_prices = close_prices * 0.98  # Assume Low = Close - 2% of Close
    return high_prices, low_prices

In [None]:
ticker_to_company_name = {
    "MSUMI.NS": "Motherson Sumi Systems Ltd.",
    "TORNTPOWER.NS": "Torrent Power Ltd.",
    "GODREJPROP.NS": "Godrej Properties Ltd.",
    "SRF.NS": "SRF Ltd.",
    "APLAPOLLO.NS": "APL Apollo Tubes Ltd.",
    "TVSMOTOR.NS": "TVS Motor Company Ltd.",
    "PAGEIND.NS": "Page Industries Ltd.",
    "AUROPHARMA.NS": "Aurobindo Pharma Ltd.",
    "JINDALSTEL.NS": "Jindal Steel & Power Ltd.",
    "BAJAJHLDNG.NS": "Bajaj Holdings & Investment Ltd.",
    "BATAINDIA.NS": "Bata India Ltd.",
    "BHEL.NS": "Bharat Heavy Electricals Ltd.",
    "CANBK.NS": "Canara Bank",
    "CHOLAFIN.NS": "Cholamandalam Investment and Finance Company Ltd.",
    "CUB.NS": "City Union Bank Ltd.",
    "DALMIASUG.NS": "Dalmia Bharat Sugar and Industries Ltd.",
    "ESCORTS.NS": "Escorts Ltd.",
    "FEDERALBNK.NS": "The Federal Bank Ltd.",
    "FORTIS.NS": "Fortis Healthcare Ltd.",
    "GICRE.NS": "General Insurance Corporation of India",
    "GMRINFRA.NS": "GMR Infrastructure Ltd.",
    "GNFC.NS": "Gujarat Narmada Valley Fertilizers & Chemicals Ltd.",
    "GODREJAGRO.NS": "Godrej Agrovet Ltd.",
    "GRASIM.NS": "Grasim Industries Ltd.",
    "HAVELLS.NS": "Havells India Ltd.",
    "HINDPETRO.NS": "Hindustan Petroleum Corporation Ltd.",
    "INDHOTEL.NS": "The Indian Hotels Company Ltd.",
    "JUBLFOOD.NS": "Jubilant FoodWorks Ltd.",
    "LICHSGFIN.NS": "LIC Housing Finance Ltd.",
    "M&MFIN.NS": "Mahindra & Mahindra Financial Services Ltd.",
    "MANAPPURAM.NS": "Manappuram Finance Ltd.",
    "MRF.NS": "MRF Ltd.",
    "NATCOPHARM.NS": "Natco Pharma Ltd.",
    "NCC.NS": "NCC Ltd.",
    "NMDC.NS": "NMDC Ltd.",
    "OBEROIRLTY.NS": "Oberoi Realty Ltd.",
    "PERSISTENT.NS": "Persistent Systems Ltd.",
    "PETRONET.NS": "Petronet LNG Ltd.",
    "RAMCOCEM.NS": "The Ramco Cements Ltd.",
    "RBLBANK.NS": "RBL Bank Ltd.",
    "SAIL.NS": "Steel Authority of India Ltd.",
    "SUNTV.NS": "Sun TV Network Ltd.",
    "TATACOMM.NS": "Tata Communications Ltd.",
    "TATAPOWER.NS": "Tata Power Company Ltd.",
    "THYROCARE.NS": "Thyrocare Technologies Ltd.",
    "TORNTPHARM.NS": "Torrent Pharmaceuticals Ltd.",
    "TRENT.NS": "Trent Ltd.",
    "VOLTAS.NS": "Voltas Ltd.",
    "WHIRLPOOL.NS": "Whirlpool of India Ltd.",
    "YESBANK.NS": "Yes Bank Ltd.",
    "ZEEL.NS": "Zee Entertainment Enterprises Ltd.",
    "ZYDUSWELL.NS": "Zydus Wellness Ltd.",
    "ABBOTINDIA.NS": "Abbott India Ltd.",
    "ASHOKLEY.NS": "Ashok Leyland Ltd.",
    "BALKRISIND.NS": "Balkrishna Industries Ltd.",
    "BEL.NS": "Bharat Electronics Ltd.",
    "CONCOR.NS": "Container Corporation of India Ltd.",
    "CROMPTON.NS": "Crompton Greaves Consumer Electricals Ltd.",
    "DEEPAKNTR.NS": "Deepak Nitrite Ltd.",
    "DIXON.NS": "Dixon Technologies (India) Ltd.",
    "EMAMILTD.NS": "Emami Ltd.",
    "INDIAMART.NS": "IndiaMART InterMESH Ltd.",
    "IRCTC.NS": "Indian Railway Catering and Tourism Corporation Ltd.",
    "JUBLPHARMA.NS": "Jubilant Pharmova Ltd.",
    "LTTS.NS": "L&T Technology Services Ltd.",
    "MFSL.NS": "Max Financial Services Ltd.",
    "METROPOLIS.NS": "Metropolis Healthcare Ltd.",
    "OBEROIRLTY.NS": "Oberoi Realty Ltd.",
    "PIIND.NS": "PI Industries Ltd.",
    "POLYCAB.NS": "Polycab India Ltd.",
    "RECLTD.NS": "REC Ltd.",
    "SUPREMEIND.NS": "Supreme Industries Ltd.",
    "TATACONSUM.NS": "Tata Consumer Products Ltd.",
    "TV18BRDCST.NS": "TV18 Broadcast Ltd.",
    "VGUARD.NS": "V-Guard Industries Ltd.",
    "VBL.NS": "Varun Beverages Ltd.",
    "VINATIORGA.NS": "Vinati Organics Ltd.",
    "ZENSARTECH.NS": "Zensar Technologies Ltd.",
    "IDFCFIRSTB.NS": "IDFC First Bank Ltd.",
    "SONACOMS.NS": "Sona BLW Precision Forgings Ltd.",
    "AMBUJACEM.NS": "Ambuja Cements Ltd.",
    "GAIL.NS": "GAIL (India) Ltd.",
    "TATAELXSI.NS": "Tata Elxsi Ltd.",
    "MAXHEALTH.NS": "Max Healthcare Institute Ltd.",
    "LALPATHLAB.NS": "Dr. Lal PathLabs Ltd.",
    "JSWENERGY.NS": "JSW Energy Ltd.",
    "AARTIIND.NS": "Aarti Industries Ltd.",
    "ADANIGREEN.NS": "Adani Green Energy Ltd.",
    "ABFRL.NS": "Aditya Birla Fashion and Retail Ltd.",
    "BANDHANBNK.NS": "Bandhan Bank Ltd.",
    "BANKINDIA.NS": "Bank of India",
    "BERGEPAINT.NS": "Berger Paints India Ltd.",
    "BOSCHLTD.NS": "Bosch Ltd.",
    "CUMMINSIND.NS": "Cummins India Ltd.",
    "DMART.NS": "Avenue Supermarts Ltd.",
    "GLENMARK.NS": "Glenmark Pharmaceuticals Ltd.",
    "GUJGASLTD.NS": "Gujarat Gas Ltd.",
    "HAL.NS": "Hindustan Aeronautics Ltd.",
    "LICI.NS": "Life Insurance Corporation of India",
    "LUXIND.NS": "Lux Industries Ltd.",
    "NAUKRI.NS": "Info Edge (India) Ltd.",
    "PHOENIXLTD.NS": "The Phoenix Mills Ltd.",
    "RAJESHEXPO.NS": "Rajesh Exports Ltd.",
    "SHREECEM.NS": "Shree Cement Ltd.",
    "TATACHEM.NS": "Tata Chemicals Ltd.",
    "THERMAX.NS": "Thermax Ltd.",
    "TTKPRESTIG.NS": "TTK Prestige Ltd.",
    "UJJIVANSFB.NS": "Ujjivan Small Finance Bank Ltd.",
    "VAKRANGEE.NS": "Vakrangee Ltd."
}


In [None]:
def generate_and_save_synthetic_data(generator, noise_dim, num_samples, stock_ticker, original_data, output_dir):
    noise = torch.randn(num_samples, noise_dim).to(device)
    synthetic_close_prices = generator(noise).cpu().detach().numpy().flatten()

    # Set Open prices based on Close prices
    synthetic_open_prices = np.roll(synthetic_close_prices, 1)
    synthetic_open_prices[0] = synthetic_close_prices[0]  # First Open = First Close

    # Derive High and Low prices
    synthetic_high_prices, synthetic_low_prices = derive_high_low_close(synthetic_close_prices)

    # Retain and pad the original volume data
    synthetic_volume = original_data['Volume'].values[:num_samples]
    if len(synthetic_volume) < num_samples:
        avg_volume = np.mean(synthetic_volume)
        synthetic_volume = np.pad(synthetic_volume, (0, num_samples - len(synthetic_volume)), 'constant', constant_values=avg_volume)

    synthetic_data = {
        'Open': synthetic_open_prices,
        'High': synthetic_high_prices,
        'Low': synthetic_low_prices,
        'Close': synthetic_close_prices,
        'Volume': synthetic_volume
    }

    final_synthetic_df = recalculate_indicators(synthetic_data)

    # Save the synthetic data
    output_dir = os.path.join(output_dir, stock_ticker)
    os.makedirs(output_dir, exist_ok=True)
    final_synthetic_df.to_csv(os.path.join(output_dir, f'{stock_ticker}_synthetic.csv'), index=False)

    print(f"Synthetic data for {stock_ticker} has been saved in {output_dir}/")



In [None]:
# Parameters
noise_dim = 100
num_samples = 1000
num_epochs = 1000
lr = 0.0002
output_dir = 'data/synthetic_data'

# Generate synthetic data for each stock
for stock_ticker, company_name in ticker_to_company_name.items():
    print(f"Processing stock: {stock_ticker} ({company_name})")
    
    # Load the historical data for the stock
    stock_data_path = f'data/processed/{stock_ticker}_final.csv'
    stock_data = pd.read_csv(stock_data_path)
    
    # Extract column names for saving
    columns = stock_data.columns

    # Train the GAN for this stock
    generator = train_gan_for_stock(stock_data, num_epochs, noise_dim, lr)
    
    # Generate and save synthetic data for all scenarios
    generate_and_save_synthetic_data(generator, noise_dim, num_samples, stock_ticker, stock_data , output_dir)
