## PART 1 - Processing the data into a usable format for our purposes and cleaning it up

In [None]:
%pip install pandas
%pip install os
%pip install datetime
%pip install torch
%pip install Dataset

In [30]:
import pandas as pd
import os
import datetime
import torch
from torch.utils.data import Dataset

def load_stock_data():
    # Prompt the user to input a stock ticker
    ticker = input('Enter a stock ticker: ').upper()

    # Construct the filename for the text file
    filename = f'C:/archive/Stocks/{ticker.lower()}.us.txt'

     # Load the CSV file into a Pandas DataFrame, skipping the first row
    df = pd.read_csv(filename, skiprows=1, header=None, names=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Interest'])

    # Drop the Date and Interest columns
    df = df.drop(['Date', 'Interest', 'Volume'], axis=1)

    # Convert the DataFrame to a PyTorch tensor
    tensor = torch.tensor(df.values, dtype=torch.float32)

    # Output the first 5 rows of the tensor
    print(f'Stock ticker: {ticker}')
    print(f'Volume of Data (number of days): {tensor.shape[0]}')

    print(f'Opening 5: {tensor[:5]}')

# Function call for USER
load_stock_data()


Stock ticker: AAPL
Volume of Data (number of days): 8364
Opening 5: tensor([[0.4239, 0.4290, 0.4187, 0.4239],
        [0.4239, 0.4252, 0.4137, 0.4213],
        [0.4252, 0.4367, 0.4252, 0.4290],
        [0.4290, 0.4316, 0.4162, 0.4162],
        [0.4393, 0.4405, 0.4393, 0.4393]])


## PART 2 - GAN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import os
import datetime
import torch
from torch.utils.data import Dataset

# Prompt the user to input a stock ticker
ticker = input('Enter a stock ticker: ').upper()

    # Construct the filename for the text file
filename = f'C:/archive/Stocks/{ticker.lower()}.us.txt'

     # Load the CSV file into a Pandas DataFrame, skipping the first row
df = pd.read_csv(filename, skiprows=1, header=None, names=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Interest'])

    # Drop the Date and Interest columns
df = df.drop(['Date', 'Interest', 'Volume'], axis=1)

    # Convert the DataFrame to a PyTorch tensor
tensor = torch.tensor(df.values, dtype=torch.float32)


# Define the generator model
class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Generator, self).__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_layer = nn.Linear(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        x = torch.relu(self.hidden_layer(x))
        x = self.output_layer(x)
        return x

# Define the discriminator model
class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Discriminator, self).__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_layer = nn.Linear(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        x = torch.relu(self.hidden_layer(x))
        x = self.output_layer(x)
        x = self.sigmoid(x)
        return x

# Define the GAN model
class GAN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GAN, self).__init__()
        self.input_dim = input_dim
        self.gen_hidden_dim = gen_hidden_dim
        self.disc_hidden_dim = disc_hidden_dim
        self.output_dim = output_dim

        # Generator model
        self.generator = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Tanh()
        )

        # Discriminator model
        self.discriminator = nn.Sequential(
            nn.Linear(output_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # Generator forward pass
        z = torch.randn(x.shape[0], input_dim)
        fake_data = self.generator(z)

        # Discriminator forward pass
        disc_output = self.discriminator(fake_data)

        return fake_data, disc_output
    

# Define the training function for the GAN
def train_gan(data, epochs, batch_size, input_dim, gen_hidden_dim, disc_hidden_dim, output_dim):
    # Initialize the GAN model and the loss functions and optimizers
    gan_model = GAN(input_dim, gen_hidden_dim, disc_hidden_dim, output_dim)
    disc_loss_fn = nn.BCELoss()
    gen_loss_fn = nn.BCELoss()
    disc_optimizer = optim.Adam(gan_model.discriminator.parameters(), lr=0.0002)
    gen_optimizer = optim.Adam(gan_model.generator.parameters(), lr=0.0002)

    # Train the GAN for the specified number of epochs
    for epoch in range(epochs):
        # Shuffle the data and create batches
        np.random.shuffle(data)
        data_batches = torch.split(data, batch_size)

        for i, batch in enumerate(data_batches):
            # Generate fake data with the generator
            noise = torch.randn(batch_size, input_dim)
            fake_data = gan_model.generator(noise)
            num_fake_data = 2000
            num_epochs = 100

            # Train the discriminator on both real and fake data
            disc_optimizer.zero_grad()
            real_labels = torch.ones(batch_size, 1)
            fake_labels = torch.zeros(batch_size, 1)

            real_loss = disc_loss_fn(gan_model.discriminator(batch), real_labels)
            fake_loss = disc_loss_fn(gan_model.discriminator(fake_data.detach()), fake_labels)
            disc_loss = real_loss + fake_loss
            disc_loss.backward()
            disc_optimizer.step()

            # Train the generator to fool the discriminator
            gen_optimizer.zero_grad()
            gen_labels = torch.ones(batch_size, 1)
            gen_loss = gen_loss_fn(gan_model.d(gan_model.g(noise)), gen_labels)
            gen_loss.backward()
            gen_optimizer.step()

            # Generate some fake data
            gan_model.eval()
            with torch.no_grad():
             fake_data = gan_model.g(torch.randn(num_fake_data, latent_dim, device=device))
            fake_data = fake_data.cpu().detach().numpy()

        # Append the fake data to the real data tensor
        tensor = torch.cat((tensor, torch.tensor(fake_data, dtype=torch.float32)), dim=0)

    # Save the generator model
    torch.save(gan_model.g.state_dict(), 'generator.pth')

    # Return the generated data
    return tensor[-num_fake_data:]


import matplotlib.pyplot as plt
input_dim = 4
latent_dim = 4
gen_hidden_dim = 128
disc_hidden_dim = 128
output_dim = 4
gan_model = GAN(input_dim, disc_hidden_dim, output_dim)
num_fake_data = 2000


fake_data = gan_model.forward(torch.randn(num_fake_data, latent_dim))[0]
fake_data = fake_data.cpu().detach().numpy()
df = pd.DataFrame(fake_data)

tensor = torch.cat((tensor, torch.tensor(fake_data, dtype=torch.float32)), dim=0)
# Define the loss functions and optimizers
gen_loss_fn = nn.BCELoss()
disc_loss_fn = nn.BCELoss()
gen_optimizer = optim.Adam(gan_model.generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
disc_optimizer = optim.Adam(gan_model.discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

num_samples = 192
# Generate fake data using the trained GAN
generated_data = gan_model.generator(torch.randn((num_samples, latent_dim)))

# Plot the real and generated data over time
fig, ax = plt.subplots(figsize=(15, 5))

# Plot the real data
ax.plot(tensor[:, 0], label='Real Data1', color='blue')
ax.plot(tensor[:, 1], label='Real Data2', color='green')
ax.plot(tensor[:, 2], label='Real Data3', color='red')
ax.plot(tensor[:, 3], label='Real Data4', color='black')

# Plot the generated data
ax.plot(generated_data.detach().numpy()[:, 0], label='Generated Data1', color='orange')
ax.plot(generated_data.detach().numpy()[:, 1], label='Generated Data2', color='purple')
ax.plot(generated_data.detach().numpy()[:, 2], label='Generated Data3', color='brown')
ax.plot(generated_data.detach().numpy()[:, 3], label='Generated Data3', color='yellow')

# Set the axis labels and title
ax.set_xlabel('Time (Days)')
ax.set_ylabel('Data Value')
ax.set_title('Real and Generated Data')
ax.legend()

# Show the plot
plt.show()


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import os
import datetime
import torch
from torch.utils.data import Dataset

# Prompt the user to input a stock ticker
ticker = input('Enter a stock ticker: ').upper()

# Construct the filename for the text file
filename = f'C:/archive/Stocks/{ticker.lower()}.us.txt'

# Load the CSV file into a Pandas DataFrame, skipping the first row
df = pd.read_csv(filename, skiprows=1, header=None, names=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Interest'])

# Drop the Date and Interest columns
df = df.drop(['Date', 'Interest', 'Volume'], axis=1)

# Convert the DataFrame to a PyTorch tensor
tensor = torch.tensor(df.values, dtype=torch.float32)


# Define the generator model
class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Generator, self).__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_layer = nn.Linear(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        x = torch.relu(self.hidden_layer(x))
        x = self.output_layer(x)
        return x


# Define the discriminator model
class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Discriminator, self).__init__()
        self.input_layer = nn.Linear(input_dim, hidden_dim)
        self.hidden_layer = nn.Linear(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        x = torch.relu(self.hidden_layer(x))
        x = self.output_layer(x)
        x = self.sigmoid(x)
        return x


# Define the GAN model
class GAN(nn.Module):
    def __init__(self, input_dim, gen_hidden_dim, disc_hidden_dim, output_dim):
        super(GAN, self).__init__()
        self.input_dim = input_dim
        self.gen_hidden_dim = gen_hidden_dim
        self.disc_hidden_dim = disc_hidden_dim
        self.output_dim = output_dim

        # Generator model
        self.generator = nn.Sequential(
            nn.Linear(input_dim, gen_hidden_dim),
            nn.ReLU(),
            nn.Linear(gen_hidden_dim, gen_hidden_dim),
            nn.ReLU(),
            nn.Linear(gen_hidden_dim, output_dim),
            nn.Tanh()
        )

        # Discriminator model
        self.discriminator = nn.Sequential(
            nn.Linear(output_dim, disc_hidden_dim),
            nn.ReLU(),
            nn.Linear(disc_hidden_dim, disc_hidden_dim),
            nn.ReLU(),
            nn.Linear(disc_hidden_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        # Generator forward pass
        z = torch.randn(x.shape[0], self.input_dim)
        fake_data = self.generator(z)
        # Discriminator forward pass on real data
        real_output = self.discriminator(x)

    # Discriminator forward pass on fake data
        fake_output = self.discriminator(fake_data)

        return real_output, fake_output
    

def train(self, dataloader, num_epochs):
    for epoch in range(num_epochs):
        for i, data in enumerate(dataloader):
            # Train discriminator
            self.discriminator_optimizer.zero_grad()
            real_data = data[0].to(self.device)
            real_labels = torch.ones(real_data.size(0)).to(self.device)
            fake_labels = torch.zeros(real_data.size(0)).to(self.device)

            real_output, fake_output = self.forward(real_data)

            real_loss = self.loss_function(real_output, real_labels)
            fake_loss = self.loss_function(fake_output, fake_labels)
            loss = real_loss + fake_loss

            loss.backward()
            self.discriminator_optimizer.step()

            # Train generator
            self.generator_optimizer.zero_grad()
            z = torch.randn(real_data.size(0), self.input_dim).to(self.device)
            fake_data = self.generator(z)

            fake_output = self.discriminator(fake_data)

            generator_loss = self.loss_function(fake_output, real_labels)

            generator_loss.backward()
            self.generator_optimizer.step()

            if i % 100 == 0:
                print('[Epoch {}/{}], [Step {}/{}], Discriminator Loss: {:.4f}, Generator Loss: {:.4f}'
                      .format(epoch, num_epochs, i, len(dataloader), loss.item(), generator_loss.item()))
                

