# Simple Neural Network

## 1. Data Procedure

- We could initialize a dataloader instance to get what it is like: 

In [None]:
# import the DataLoader
from src.data.DataLoader import DataLoader

# Assuming 'data.csv' is in a folder named 'dataset/data' in the src directory
file_path = 'dataset/data/data.csv'

# Create an instance of DataLoader
data_loader_0 = DataLoader(file_path, shuffle=True)  # setting shuffle to True for testing

# Load the data
data = data_loader_0.load_data()

# Print the first few rows of the data to verify loading
print(data.head())

# Print what type of data is returned
print(type(data))

- data_loader is now created, we initialize a data_processor to handle the file

In [None]:
# Import necessary classes
from src.data.DataProcessor import DataProcessor

# Load data using DataLoader (if not already loaded)
data = data_loader_0.load_data()

# Create an instance of DataProcessor
# Assuming normalization is desired and the default split proportions are fine
data_processor_0 = DataProcessor(data_loader_0, normalize=True)

# Process the data to get train, validation, and test sets
train_data, validate_data, test_data = data_processor_0.process_data()

# Optionally, print the shapes of the datasets to verify everything is as expected
print("Train Data Shape:", train_data.shape)
print("Validation Data Shape:", validate_data.shape)
print("Test Data Shape:", test_data.shape)


- Using BaseDataset to directly create splited and normalized datasets

In [None]:
from src.data.BaseDataset import BaseDataset

# Assume file_path is correctly defined relative to the script running this code
dataset = BaseDataset('dataset/data/data.csv', shuffle=True, normalize=True)
train_data, validate_data, test_data = dataset.prepare_data()

# Optionally, directly access data from the dataset object later
print(dataset.train_data.head())
print(dataset.validate_data.head())
print(dataset.test_data.head())


- Using matplotlib to plot SOME of the data features

In [None]:
from src.data.Utils_Data import plot_all_histograms
from src.data.Utils_Data import plot_2d_data

# Assuming `df_2D` is the DataFrame containing your 2D dataset
plot_2d_data(dataset.train_data, feature_columns=['x1', 'x2'], label_column='target')

# Plotting the histogram of train_data for 'x1'
plot_all_histograms(dataset.train_data, exclude_columns=['target'])


## 2. Modeling Procedure
### 2.1 Forward passing
- After implementing BaseNetwork and the inherited ClassifierNetwork, we can test its forward pass

In [None]:
from src.models.ClassifierNetwork import Classifier
# Get the number of features from the dataset
num_features = dataset.train_data.shape[1] - 1  # Subtract 1 for the target column
# Assuming you have a ClassifierNetwork class already imported and ready
classifier = Classifier(num_features)  # Initialize with 2 features as your dataset suggests,

# Extract features and targets from prepared data
X_train = train_data.drop('target', axis=1).values
y_train = train_data['target'].values

X_val = validate_data.drop('target', axis=1).values
y_val = validate_data['target'].values

X_test = test_data.drop('target', axis=1).values
y_test = test_data['target'].values

# Perform a forward pass with the training data
train_preds = classifier.forward(X_train)
print("Training Predictions Shape:\n", train_preds.shape)
print("Some Training Predictions:\n", train_preds[:5])


### 2.2 Calculating Loss
- Since the forward pass works correctly, we perform the loss calculation

In [None]:
from src.models.LossFunction import BCE
# Assuming you have a BCE class already implemented
loss_func = BCE()

# Calculate loss for training data
train_loss = loss_func.forward(train_preds, y_train)
print(f"Training Loss: {train_loss:.4f}")

### 2.3 Calculating Gradients
- Now we should got back and calculate the gradients for all weights

In [None]:

from src.models.Utils_Model import plot_decision_boundary
# Step 1: Forward Pass (Already done above)
# Step 2: Compute Loss Gradient
dloss = loss_func.backward(train_preds, y_train)

# Step 3: Backward Pass
gradients = classifier.backward(dloss)

# Step 4: Print Gradients
print("Gradients of the weights:\n", gradients)
plot_decision_boundary(dataset.validate_data, classifier=classifier)

### 2.4 Batch Gradient Descent
- After Computing The Whole Network and Averaging to a mean Gradient, we can now update weights and see the train loss changes

In [None]:
from src.models.Optimizer import Optimizer

for i in range(4):
    for x in range(20):
        for y in range(x):
            # Use the optimizer to update the model's weights based on the gradients
            optimizer = Optimizer(classifier, learning_rate=0.01)
            optimizer.step(gradients)

            # Re-evaluate the training loss after updating the weights
            train_preds_updated = classifier.forward(X_train)
            updated_train_loss = loss_func.forward(train_preds_updated, y_train)
    plot_decision_boundary(dataset.validate_data, classifier=classifier)
    print(f"Updated Training Loss: \
        {loss_func.forward(classifier.forward(X_train), y_train):.4f}")


### 2.5 Mini-Batch Gradient Descent

In [None]:
from src.Solver import Solver

solver = Solver(model=classifier, data={'X_train': X_train, 'y_train': y_train,
                                        'X_val': X_val, 'y_val': y_val},
                loss_func=loss_func, learning_rate=0.001, batch_size=32)
solver.train(epochs=200)
plot_decision_boundary(dataset.validate_data, classifier=classifier)