**Load Libraries**

In [16]:
import sys
import os
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, models

**Load Data**

In [None]:
# Function to process multiple CSV files in a folder
def process_financial_data(folder_path):
    people_data = {}  # Dictionary to store data for each person (file)
    
    # Iterate over all CSV files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):  
            file_path = os.path.join(folder_path, filename)
            
            # Load the CSV file
            df = pd.read_csv(file_path)
            
            # Convert the 'Date' column to datetime format
            df['Date'] = pd.to_datetime(df['Date'])
            
            # Split the data into income and spending based on 'Transaction_Type'
            income_data = df[df['Transaction_Type'] == 'Deposit'][['Date', 'Amount']].copy()
            spending_data = df[df['Transaction_Type'] == 'Withdrawal'][['Date', 'Amount']].copy()
            
            # Store the processed data in the dictionary using the filename as the key
            people_data[filename] = {
                'income': income_data,
                'spending': spending_data
            }
    
    return people_data

**GAN**
1) Generator (Nueral Net that creates what it thinks is replicated data)
2) Discriminator (Nueral Net that is fed both the real and fake data and chooses which one is the most realistis)
3) Real/Fake (Sends Back Propogration to the Nueral Nets to edit them depending on the outcome of the Discriminator)

**Helper Functions**

In [24]:
# Generate Noise for Generator
def latent_dim():
    #Generate function to optimize randomness
    return 10

#alpha function for LeakyReLU paramater
def alpha():
    #Generate function to optimize dying nuerons
    return 0.2

#input shape function for discriminator
def inputShape(dataFrame):
    return len(dataFrame.columns)-1

#drop paramater for discriminator
def dropout():
    return 0.3

#Define batch size
def batch():
    return 32

#Data values
def meanIncome(dataFrame):
    return dataFrame['Deposits'].mean()

def stddevIncome(dataFrame):
    return dataFrame['Deposits'].std()

def meanExpense(dataFrame):
    return dataFrame['Withdrawl'].mean()

def stddevExpense(dataFrame):
    return dataFrame['Withdrawl'].std()

**Generator**

In [8]:
#Generator
def generator(noise):
    model = models.Sequential() #Sequential NN
    model.add(layers.Dense(128, activation='relu', input_dim=noise))  #Layer of 128 Neurons
    model.add(layers.BatchNormalization()) #Normaliztion of previous output layer
    model.add(layers.LeakyReLU(alpha=alpha()))  #Correct Dead Neurons
    model.add(layers.Dense(256, activation='relu', input_dim=noise))  #Layer of 256 Neurons
    model.add(layers.BatchNormalization()) #Normaliztion of previous output layer
    model.add(layers.LeakyReLU(alpha=alpha()))  #Correct dead Neurons
    model.add(layers.Dense(2, activation = 'tanh')) #Output for income and expense

    return model


**Discriminator**

In [12]:
#Discriminator
def discriminator(inputShape):
    model = models.Sequential()
    model.add(layers.Dense(256, activation = 'relu', input_shape=inputShape))
    model.add(layers.LeakyReLU(alpha=alpha()))
    model.add(layers.Dropout(dropout()))    #Dropout layer meaining dropout% of neurons will be ignored
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.LeakyReLU(alpha=alpha()))
    model.add(layers.Dropout(dropout()))
    model.add(layers.Dense(1, activation='sigmoid'))    #sigmoid activation function gives output of 0->1, 1 means real
    return model
    

**Defining GAN Model**

In [None]:
generator = generator()
discriminator = discriminator()

discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#GAN Model
discriminator.trainable = False #Freeze 
gan_input = layers.Input(shape=(latent_dim()))
generated_data = generator(gan_input)
gan_output = discriminator(generated_data)
gan = models.Model(gan_input, gan_output)
gan.compile(optimizer='adam', loss='binary_crossentropy')

**Generate Real Data**

In [None]:
def generateRealData(batch):
    income = np.random.normal(loc = meanIncome(), scale = stddevIncome(), size = batch)
    expense = np.random.normal(loc = meanExpense(), scale = stddevExpense(), size = batch)
    return np.stack((income, expense), axis=1)

**Train GAN**

In [20]:
def train_gan(generator, discriminator, gan, latent_dim, epochs, batch_size):
    #Training loop
    for epoch in range(epochs):
        #Generate 'Real Data'
        real_data = generateRealData(batch_size)
        real_labels = np.ones((batch_size, 1))

        #Generate Fake Data
        noise = np.random.normal(0,1, (batch_size, latent_dim))
        fake_data = generator.predict(noise)
        fake_labels = np.zeros((batch_size, 1))

        # Train discriminator
        discriminator_loss_real = discriminator.train_on_batch(real_data, real_labels)
        discriminator_loss_fake = discriminator.train_on_batch(fake_data, fake_labels)
        discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

        # Train generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        misleading_labels = np.ones((batch_size, 1))  # Labels for generator training
        generator_loss = gan.train_on_batch(noise, misleading_labels)

        # Print progress
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Discriminator Loss: {discriminator_loss}, Generator Loss: {generator_loss}")

#Run training
train_gan(generator, discriminator, gan, latent_dim, )