# STEP 1: Load Preprocessed Data

In [1]:
import pandas as pd
import numpy as np

artists_df = pd.read_csv("/Users/hoon/Desktop/4060J_DataScience_Project/artists.csv")

'''
# Load the preprocessed data 
1) parse & organize -> 2) feature extraction (pretrained ResNet50) -> 3) PCA to reduce dimentionality into 100
'''
image_metadata_reduced_df = pd.read_pickle('/Users/hoon/Desktop/image_metadata_reduced.pkl')


# Normalize artist names in both DataFrames (Albrecht Dürer)
import unicodedata

def normalize_name(name):
    return unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('utf-8').strip()

image_metadata_reduced_df['artist_name'] = image_metadata_reduced_df['artist_name'].apply(normalize_name)
artists_df['name'] = artists_df['name'].apply(normalize_name)

In [3]:
display(artists_df.head())
display(image_metadata_reduced_df)

Unnamed: 0,id,name,years,genre,nationality,bio,wikipedia,paintings
0,0,Amedeo Modigliani,1884 - 1920,Expressionism,Italian,Amedeo Clemente Modigliani (Italian pronunciat...,http://en.wikipedia.org/wiki/Amedeo_Modigliani,193
1,1,Vasiliy Kandinskiy,1866 - 1944,"Expressionism,Abstractionism",Russian,Wassily Wassilyevich Kandinsky (Russian: Васи́...,http://en.wikipedia.org/wiki/Wassily_Kandinsky,88
2,2,Diego Rivera,1886 - 1957,"Social Realism,Muralism",Mexican,Diego María de la Concepción Juan Nepomuceno E...,http://en.wikipedia.org/wiki/Diego_Rivera,70
3,3,Claude Monet,1840 - 1926,Impressionism,French,Oscar-Claude Monet (; French: [klod mɔnɛ]; 14 ...,http://en.wikipedia.org/wiki/Claude_Monet,73
4,4,Rene Magritte,1898 - 1967,"Surrealism,Impressionism",Belgian,René François Ghislain Magritte (French: [ʁəne...,http://en.wikipedia.org/wiki/René_Magritte,194


Unnamed: 0,file_name,artist_name,features,artist_label
0,Gustav_Klimt_113.jpg,Gustav Klimt,"[-104.051254, -30.065239, 40.834503, -27.93637...",19
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh,"[-42.690506, 50.457077, -31.054712, 49.034294,...",48
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani,"[85.80638, 74.72588, 21.94299, -97.160614, 6.6...",2
3,Edgar_Degas_455.jpg,Edgar Degas,"[110.57151, -27.310974, -106.109825, -26.60741...",10
4,Edgar_Degas_333.jpg,Edgar Degas,"[128.26991, -22.02461, -65.37852, -10.970957, ...",10
...,...,...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel,"[-112.82036, -11.718504, -2.0028427, -15.04778...",32
8351,Joan_Miro_51.jpg,Joan Miro,"[-69.65716, 75.916565, -17.81109, -51.636993, ...",27
8352,Frida_Kahlo_10.jpg,Frida Kahlo,"[53.062565, 15.8918295, -43.91319, -41.854576,...",16
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh,"[-88.60199, -56.594482, -37.02463, 22.391777, ...",48


In [5]:
# Merge the genre/style information into the image metadata DataFrame
image_metadata_reduced_df = image_metadata_reduced_df.merge(
    artists_df[['name', 'genre']],  # Use the genre or style column from artists.csv
    left_on='artist_name',
    right_on='name',
    how='left'
)

# Drop the duplicate 'name' column
image_metadata_reduced_df.drop(columns=['name'], inplace=True)

# Verify the merged DataFrame
display(image_metadata_reduced_df)

Unnamed: 0,file_name,artist_name,features,artist_label,genre
0,Gustav_Klimt_113.jpg,Gustav Klimt,"[-104.051254, -30.065239, 40.834503, -27.93637...",19,"Symbolism,Art Nouveau"
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh,"[-42.690506, 50.457077, -31.054712, 49.034294,...",48,Post-Impressionism
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani,"[85.80638, 74.72588, 21.94299, -97.160614, 6.6...",2,Expressionism
3,Edgar_Degas_455.jpg,Edgar Degas,"[110.57151, -27.310974, -106.109825, -26.60741...",10,Impressionism
4,Edgar_Degas_333.jpg,Edgar Degas,"[128.26991, -22.02461, -65.37852, -10.970957, ...",10,Impressionism
...,...,...,...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel,"[-112.82036, -11.718504, -2.0028427, -15.04778...",32,Symbolism
8351,Joan_Miro_51.jpg,Joan Miro,"[-69.65716, 75.916565, -17.81109, -51.636993, ...",27,Surrealism
8352,Frida_Kahlo_10.jpg,Frida Kahlo,"[53.062565, 15.8918295, -43.91319, -41.854576,...",16,"Primitivism,Surrealism"
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh,"[-88.60199, -56.594482, -37.02463, 22.391777, ...",48,Post-Impressionism


In [7]:
# Ensure the 'genre' column exists and has no NaN values
if 'genre' in image_metadata_reduced_df.columns:
    image_metadata_reduced_df['genre'] = image_metadata_reduced_df['genre'].fillna("")
else:
    raise KeyError("The 'genre' column is missing from the DataFrame.")

# Create the 'genre_list' column
image_metadata_reduced_df['genre_list'] = image_metadata_reduced_df['genre'].apply(lambda x: x.split(","))

In [9]:
# Check the first few rows of the DataFrame
#display(image_metadata_reduced_df[['genre', 'genre_list']])

In [11]:
from sklearn.preprocessing import MultiLabelBinarizer

# One-hot encode the genres
mlb = MultiLabelBinarizer()
one_hot_genres = mlb.fit_transform(image_metadata_reduced_df['genre_list'])

# Add the one-hot encoded genres as columns
genre_columns = mlb.classes_
for idx, genre in enumerate(genre_columns):
    image_metadata_reduced_df[genre] = one_hot_genres[:, idx]

# Example output
display(image_metadata_reduced_df)

Unnamed: 0,file_name,artist_name,features,artist_label,genre,genre_list,Abstract Expressionism,Abstractionism,Art Nouveau,Baroque,...,Pop Art,Post-Impressionism,Primitivism,Proto Renaissance,Realism,Romanticism,Social Realism,Suprematism,Surrealism,Symbolism
0,Gustav_Klimt_113.jpg,Gustav Klimt,"[-104.051254, -30.065239, 40.834503, -27.93637...",19,"Symbolism,Art Nouveau","[Symbolism, Art Nouveau]",0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh,"[-42.690506, 50.457077, -31.054712, 49.034294,...",48,Post-Impressionism,[Post-Impressionism],0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani,"[85.80638, 74.72588, 21.94299, -97.160614, 6.6...",2,Expressionism,[Expressionism],0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Edgar_Degas_455.jpg,Edgar Degas,"[110.57151, -27.310974, -106.109825, -26.60741...",10,Impressionism,[Impressionism],0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Edgar_Degas_333.jpg,Edgar Degas,"[128.26991, -22.02461, -65.37852, -10.970957, ...",10,Impressionism,[Impressionism],0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel,"[-112.82036, -11.718504, -2.0028427, -15.04778...",32,Symbolism,[Symbolism],0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8351,Joan_Miro_51.jpg,Joan Miro,"[-69.65716, 75.916565, -17.81109, -51.636993, ...",27,Surrealism,[Surrealism],0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
8352,Frida_Kahlo_10.jpg,Frida Kahlo,"[53.062565, 15.8918295, -43.91319, -41.854576,...",16,"Primitivism,Surrealism","[Primitivism, Surrealism]",0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh,"[-88.60199, -56.594482, -37.02463, 22.391777, ...",48,Post-Impressionism,[Post-Impressionism],0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


In [13]:
from sklearn.model_selection import train_test_split

# Extract features and genres (as one-hot encoded labels)
X = np.stack(image_metadata_reduced_df['features'].values)  # Feature matrix
y_genre = image_metadata_reduced_df[genre_columns].values  # One-hot encoded genres

# Add indices to track the split
indices = np.arange(len(X))  # Create indices for the full dataset

# Split the data into training and testing sets, including the indices
X_train, X_test, y_train, y_test, train_indices, test_indices = train_test_split(
    X, y_genre, indices, test_size=0.2, random_state=42
)

# STEP 2: Implementation

# MODEL 1: Simple Neural Networks <1>
Two hidden layers

In [17]:
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.01):
        self.learning_rate = learning_rate

        # Initialize weights and biases
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * 0.01,
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * 0.01,
            "W3": np.random.randn(hidden_sizes[1], output_size) * 0.01,
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, output_size)),
        }

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0

    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))

    def binary_cross_entropy_loss(self, y_pred, y_true):
        n_samples = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) / n_samples
  
    def forward(self, X):
        # Forward propagation
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)

        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)

        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.sigmoid(self.Z3)  # Sigmoid activation for multi-label classification

        return self.A3

    def backward(self, X, y_true, y_pred):
        # Backward propagation
        n_samples = y_true.shape[0]

        # Gradients for the output layer
        dZ3 = y_pred - y_true  # Binary cross-entropy gradient
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        # Gradients for the second hidden layer
        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        # Gradients for the first hidden layer
        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        # Update weights and biases
        self.weights["W1"] -= self.learning_rate * dW1
        self.biases["b1"] -= self.learning_rate * db1

        self.weights["W2"] -= self.learning_rate * dW2
        self.biases["b2"] -= self.learning_rate * db2

        self.weights["W3"] -= self.learning_rate * dW3
        self.biases["b3"] -= self.learning_rate * db3

    def train(self, X, y, epochs=20, batch_size=32):
        n_samples = X.shape[0]

        for epoch in range(epochs):
            # Shuffle the data
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch gradient descent
            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]

                # Forward and backward propagation
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            # Compute loss
            y_pred_full = self.forward(X)
            loss = self.binary_cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X, threshold=0.5):
        y_pred = self.forward(X)
        max_probs = np.max(y_pred, axis=1) # Highest probability for each sample
        max_labels = np.argmax(y_pred, axis=1) # Index of the highest probability
        return max_probs, max_labels

In [144]:
##Train and Evaluate the Neural Network for Genre Classification
input_size = X_train.shape[1]
hidden_sizes = [512, 256]  # Two hidden layers with 512 and 256 neurons
output_size = y_train.shape[1]  # Number of genres

# Initialize and train the neural network
nn1 = SimpleNeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.01)
nn1.train(X_train, y_train, epochs=30, batch_size=32)

# Evaluate on the test set
y_pred = nn1.predict(X_test)

Epoch 1/30, Loss: 2.8653
Epoch 2/30, Loss: 2.2607
Epoch 3/30, Loss: 1.8960
Epoch 4/30, Loss: 1.5747
Epoch 5/30, Loss: 1.4347
Epoch 6/30, Loss: 1.2104
Epoch 7/30, Loss: 1.0262
Epoch 8/30, Loss: 0.8937
Epoch 9/30, Loss: 0.7468
Epoch 10/30, Loss: 0.6658
Epoch 11/30, Loss: 0.5940
Epoch 12/30, Loss: 0.4278
Epoch 13/30, Loss: 0.4969
Epoch 14/30, Loss: 0.2265
Epoch 15/30, Loss: 0.2230
Epoch 16/30, Loss: 0.1500
Epoch 17/30, Loss: 0.1247
Epoch 18/30, Loss: 0.0649
Epoch 19/30, Loss: 0.0434
Epoch 20/30, Loss: 0.0341
Epoch 21/30, Loss: 0.0286
Epoch 22/30, Loss: 0.0220
Epoch 23/30, Loss: 0.0193
Epoch 24/30, Loss: 0.0174
Epoch 25/30, Loss: 0.0155
Epoch 26/30, Loss: 0.0143
Epoch 27/30, Loss: 0.0133
Epoch 28/30, Loss: 0.0121
Epoch 29/30, Loss: 0.0113
Epoch 30/30, Loss: 0.0105


In [146]:
# Evaluate on the test set
max_probs, predicted_genres_indices = nn1.predict(X_test)

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions, actual genres, and test indices
for i, (prob, genre, actual_genre_list, test_idx) in enumerate(zip(max_probs, predicted_genres, y_test, test_indices)):
    # Map one-hot encoded actual genres to their names
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre_list) if val == 1]
    
    # Extract the painter's name for the test sample
    painter = image_metadata_reduced_df.iloc[test_idx]['artist_name']
    
    # Check if the predicted genre is in the actual genres
    if genre in actual_genres:
        correct_predictions += 1

    # Display results for each test sample
    print(f"Test Sample {i + 1}:")
    print(f"Test Data Painter: {painter}")
    print(f"Predicted Highest Probability: {prob:.9f}")
    print(f"Predicted Genre: {genre}")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct Prediction: {'Yes' if genre in actual_genres else 'No'}")
    print("-" * 30)

Test Sample 1:
Test Data Painter: Albrecht Durer
Predicted Highest Probability: 0.999999999
Predicted Genre: Northern Renaissance
Actual Genres: ['Northern Renaissance']
Correct Prediction: Yes
------------------------------
Test Sample 2:
Test Data Painter: Francisco Goya
Predicted Highest Probability: 0.279380321
Predicted Genre: Cubism
Actual Genres: ['Romanticism']
Correct Prediction: No
------------------------------
Test Sample 3:
Test Data Painter: Pierre-Auguste Renoir
Predicted Highest Probability: 0.494538312
Predicted Genre: Cubism
Actual Genres: ['Impressionism']
Correct Prediction: No
------------------------------
Test Sample 4:
Test Data Painter: Gustave Courbet
Predicted Highest Probability: 0.998438396
Predicted Genre: High Renaissance
Actual Genres: ['Realism']
Correct Prediction: No
------------------------------
Test Sample 5:
Test Data Painter: Francisco Goya
Predicted Highest Probability: 0.999998690
Predicted Genre: Romanticism
Actual Genres: ['Romanticism']
Corr

In [148]:
# Calculate and display accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Simple Neural Network Accuracy: {accuracy:.2f}%")

Simple Neural Network Accuracy: 73.19%


### TEST with increased size (8 times larger) of hidden layers

In [151]:
hidden_sizes = [4096, 2048]  # Two hidden layers with 4096 and 2048 neurons

# Initialize and train the neural network
nn2 = SimpleNeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.01)
nn2.train(X_train, y_train, epochs=30, batch_size=32)

# Evaluate on the test set
y_pred = nn2.predict(X_test)

# Evaluate on the test set
max_probs, predicted_genres_indices = nn2.predict(X_test)

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions, actual genres, and test indices
for i, (prob, genre, actual_genre_list, test_idx) in enumerate(zip(max_probs, predicted_genres, y_test, test_indices)):
    # Map one-hot encoded actual genres to their names
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre_list) if val == 1]
    
    # Extract the painter's name for the test sample
    painter = image_metadata_reduced_df.iloc[test_idx]['artist_name']
    
    # Check if the predicted genre is in the actual genres
    if genre in actual_genres:
        correct_predictions += 1
'''
    # Display results for each test sample
    print(f"Test Sample {i + 1}:")
    print(f"Test Data Painter: {painter}")
    print(f"Predicted Highest Probability: {prob:.9f}")
    print(f"Predicted Genre: {genre}")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct Prediction: {'Yes' if genre in actual_genres else 'No'}")
    print("-" * 30)
'''

Epoch 1/30, Loss: 1.8434
Epoch 2/30, Loss: 1.2590
Epoch 3/30, Loss: 0.9196
Epoch 4/30, Loss: 0.7269
Epoch 5/30, Loss: 0.4738
Epoch 6/30, Loss: 0.3624
Epoch 7/30, Loss: 0.1903
Epoch 8/30, Loss: 0.1197
Epoch 9/30, Loss: 0.0767
Epoch 10/30, Loss: 0.0573
Epoch 11/30, Loss: 0.0442
Epoch 12/30, Loss: 0.0368
Epoch 13/30, Loss: 0.0308
Epoch 14/30, Loss: 0.0270
Epoch 15/30, Loss: 0.0240
Epoch 16/30, Loss: 0.0211
Epoch 17/30, Loss: 0.0192
Epoch 18/30, Loss: 0.0172
Epoch 19/30, Loss: 0.0159
Epoch 20/30, Loss: 0.0146
Epoch 21/30, Loss: 0.0135
Epoch 22/30, Loss: 0.0127
Epoch 23/30, Loss: 0.0118
Epoch 24/30, Loss: 0.0111
Epoch 25/30, Loss: 0.0105
Epoch 26/30, Loss: 0.0099
Epoch 27/30, Loss: 0.0094
Epoch 28/30, Loss: 0.0088
Epoch 29/30, Loss: 0.0084
Epoch 30/30, Loss: 0.0080


'\n    # Display results for each test sample\n    print(f"Test Sample {i + 1}:")\n    print(f"Test Data Painter: {painter}")\n    print(f"Predicted Highest Probability: {prob:.9f}")\n    print(f"Predicted Genre: {genre}")\n    print(f"Actual Genres: {actual_genres}")\n    print(f"Correct Prediction: {\'Yes\' if genre in actual_genres else \'No\'}")\n    print("-" * 30)\n'

In [153]:
# Calculate and display accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Simple Neural Network Accuracy (increased size of hidden layers): {accuracy:.2f}%")

Simple Neural Network Accuracy (increased size of hidden layers): 75.40%


## Model 2: Boosted Neural Network (Ensemble Learning) 
Boosting trains multiple neural networks sequentially, focusing on correcting the errors of the previous network.

In [156]:
class BoostedNeuralNetwork:
    def __init__(self, base_model_class, n_models, input_size, hidden_sizes, output_size, learning_rate, gradient_clip_value=0.5):
        self.models = []
        self.gradient_clip_value = gradient_clip_value

        for _ in range(n_models):
            model = base_model_class(input_size, hidden_sizes, output_size, learning_rate)
            self.models.append(model)

    def train(self, X, y, epochs=10, batch_size=32, residual_threshold=1e-3):
        residuals = y
        for model_idx, model in enumerate(self.models):
            print(f"Training model {model_idx + 1}/{len(self.models)}")
            
            # Train the current model
            model.train(X, residuals, epochs, batch_size, self.gradient_clip_value)
            
            # Compute predictions
            predictions = model.forward(X)
            
            # Update residuals
            residuals = residuals - predictions
            
            # Scale residuals to prevent numerical instability
            residual_norm = np.linalg.norm(residuals, axis=0, keepdims=True)
            if np.all(residual_norm < residual_threshold):
                print("Residuals below threshold. Stopping further training.")
                break
            residuals = residuals / residual_norm

    def predict(self, X):
        # Aggregate predictions from all models
        ensemble_predictions = np.zeros_like(self.models[0].forward(X))
        for model in self.models:
            ensemble_predictions += model.forward(X)
        return ensemble_predictions / len(self.models)  # Average predictions


# Gradient Clipping is added
class SimpleNeuralNetwork2:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate):
        self.learning_rate = learning_rate
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * 0.01,
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * 0.01,
            "W3": np.random.randn(hidden_sizes[1], output_size) * 0.01,
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, output_size)),
        }

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0

    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))

    def binary_cross_entropy_loss(self, y_pred, y_true):
        n_samples = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) / n_samples

    def forward(self, X):
        # Forward propagation
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)
        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.sigmoid(self.Z3)
        return self.A3

    def backward(self, X, y_true, y_pred, gradient_clip_value):
        # Backward propagation
        n_samples = y_true.shape[0]

        dZ3 = y_pred - y_true
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        # Gradient clipping
        dW3 = np.clip(dW3, -gradient_clip_value, gradient_clip_value)
        dW2 = np.clip(dW2, -gradient_clip_value, gradient_clip_value)
        dW1 = np.clip(dW1, -gradient_clip_value, gradient_clip_value)

        # Update weights and biases
        self.weights["W1"] -= self.learning_rate * dW1
        self.biases["b1"] -= self.learning_rate * db1
        self.weights["W2"] -= self.learning_rate * dW2
        self.biases["b2"] -= self.learning_rate * db2
        self.weights["W3"] -= self.learning_rate * dW3
        self.biases["b3"] -= self.learning_rate * db3

    def train(self, X, y, epochs, batch_size, gradient_clip_value):
        n_samples = X.shape[0]
        for epoch in range(epochs):
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]
            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred, gradient_clip_value)
            y_pred_full = self.forward(X)
            loss = self.binary_cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

### Test with different number of models

In [203]:
## TEST 1
# Create and train boosted neural networks
boosted_nn1 = BoostedNeuralNetwork(SimpleNeuralNetwork2, n_models=2, input_size=X_train.shape[1], hidden_sizes=[512, 256], output_size=y_train.shape[1], learning_rate=0.01)
## MUST Mention why we lower the learning rate (higher learning rate -> may have overflow & doesnt guarantee higher accuary)
## Overflow: 0.005 (47%) & 0.01 (72%)
boosted_nn1.train(X_train, y_train, epochs=30, batch_size=32)

Training model 1/2
Epoch 1/30, Loss: 2.8546
Epoch 2/30, Loss: 2.2740
Epoch 3/30, Loss: 1.8764
Epoch 4/30, Loss: 1.6576
Epoch 5/30, Loss: 1.3712
Epoch 6/30, Loss: 1.2116
Epoch 7/30, Loss: 1.0278
Epoch 8/30, Loss: 0.8690
Epoch 9/30, Loss: 0.7699
Epoch 10/30, Loss: 0.6511
Epoch 11/30, Loss: 0.5191
Epoch 12/30, Loss: 0.4079
Epoch 13/30, Loss: 0.3355
Epoch 14/30, Loss: 0.2788
Epoch 15/30, Loss: 0.2548
Epoch 16/30, Loss: 0.1863
Epoch 17/30, Loss: 0.1161
Epoch 18/30, Loss: 0.0767
Epoch 19/30, Loss: 0.0509
Epoch 20/30, Loss: 0.0343
Epoch 21/30, Loss: 0.0352
Epoch 22/30, Loss: 0.0227
Epoch 23/30, Loss: 0.0193
Epoch 24/30, Loss: 0.0171
Epoch 25/30, Loss: 0.0153
Epoch 26/30, Loss: 0.0138
Epoch 27/30, Loss: 0.0127
Epoch 28/30, Loss: 0.0116
Epoch 29/30, Loss: 0.0109
Epoch 30/30, Loss: 0.0102
Training model 2/2
Epoch 1/30, Loss: -0.0183
Epoch 2/30, Loss: -0.0363
Epoch 3/30, Loss: -0.0519
Epoch 4/30, Loss: -0.0576
Epoch 5/30, Loss: -0.0558
Epoch 6/30, Loss: -0.0455


  return 1 / (1 + np.exp(-Z))


Epoch 7/30, Loss: -0.0341
Epoch 8/30, Loss: -0.0321
Epoch 9/30, Loss: -0.0345
Epoch 10/30, Loss: -0.0328
Epoch 11/30, Loss: -0.0274
Epoch 12/30, Loss: -0.0316
Epoch 13/30, Loss: -0.0334
Epoch 14/30, Loss: -0.0339
Epoch 15/30, Loss: -0.0337
Epoch 16/30, Loss: -0.0338
Epoch 17/30, Loss: -0.0337
Epoch 18/30, Loss: -0.0336
Epoch 19/30, Loss: -0.0336
Epoch 20/30, Loss: -0.0336
Epoch 21/30, Loss: -0.0336
Epoch 22/30, Loss: -0.0336
Epoch 23/30, Loss: -0.0336
Epoch 24/30, Loss: -0.0336
Epoch 25/30, Loss: -0.0336
Epoch 26/30, Loss: -0.0336
Epoch 27/30, Loss: -0.0336
Epoch 28/30, Loss: -0.0336
Epoch 29/30, Loss: -0.0336
Epoch 30/30, Loss: -0.0336


In [205]:
# Predict probabilities for the test set
boosted_predictions = boosted_nn1.predict(X_test)  # Shape: (n_samples, num_classes)

# Get the highest probability and corresponding genres
max_probs = np.max(boosted_predictions, axis=1)  # Highest probability for each sample
predicted_genres_indices = np.argmax(boosted_predictions, axis=1)  # Index of the highest probability

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions, actual genres, and test indices
for i, (prob, genre, actual_genre_list, test_idx) in enumerate(zip(max_probs, predicted_genres, y_test, test_indices)):
    # Map one-hot encoded actual genres to their names
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre_list) if val == 1]
    
    # Extract the painter's name for the test sample
    painter = image_metadata_reduced_df.iloc[test_idx]['artist_name']
    
    # Check if the predicted genre is in the actual genres
    if genre in actual_genres:
        correct_predictions += 1

    # Display results for each test sample
    print(f"Test Sample {i + 1}:")
    print(f"Test Data Painter: {painter}")
    print(f"Predicted Highest Probability: {prob:.9f}")
    print(f"Predicted Genre: {genre}")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct Prediction: {'Yes' if genre in actual_genres else 'No'}")
    print("-" * 30)

Test Sample 1:
Test Data Painter: Albrecht Durer
Predicted Highest Probability: 0.499999268
Predicted Genre: Northern Renaissance
Actual Genres: ['Northern Renaissance']
Correct Prediction: Yes
------------------------------
Test Sample 2:
Test Data Painter: Francisco Goya
Predicted Highest Probability: 0.494882341
Predicted Genre: Cubism
Actual Genres: ['Romanticism']
Correct Prediction: No
------------------------------
Test Sample 3:
Test Data Painter: Pierre-Auguste Renoir
Predicted Highest Probability: 0.473419596
Predicted Genre: Cubism
Actual Genres: ['Impressionism']
Correct Prediction: No
------------------------------
Test Sample 4:
Test Data Painter: Gustave Courbet
Predicted Highest Probability: 0.230768766
Predicted Genre: Mannerism
Actual Genres: ['Realism']
Correct Prediction: No
------------------------------
Test Sample 5:
Test Data Painter: Francisco Goya
Predicted Highest Probability: 0.499947556
Predicted Genre: Romanticism
Actual Genres: ['Romanticism']
Correct Pre

  return 1 / (1 + np.exp(-Z))


In [207]:
# Calculate and display overall accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Boosted Neural Network Accuracy (2 models): {accuracy:.2f}%")

Boosted Neural Network Accuracy (2 models): 72.59%


In [171]:
## TEST 2
# Create and train boosted neural networks (5 models)
boosted_nn2 = BoostedNeuralNetwork(SimpleNeuralNetwork2, n_models=5, input_size=X_train.shape[1], hidden_sizes=[512, 256], output_size=y_train.shape[1], learning_rate=0.001)
boosted_nn2.train(X_train, y_train, epochs=30, batch_size=32)

# Predict probabilities for the test set
boosted_predictions = boosted_nn2.predict(X_test)  # Shape: (n_samples, num_classes)

# Get the highest probability and corresponding genres
max_probs = np.max(boosted_predictions, axis=1)  # Highest probability for each sample
predicted_genres_indices = np.argmax(boosted_predictions, axis=1)  # Index of the highest probability

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions, actual genres, and test indices
for i, (prob, genre, actual_genre_list, test_idx) in enumerate(zip(max_probs, predicted_genres, y_test, test_indices)):
    # Map one-hot encoded actual genres to their names
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre_list) if val == 1]
    
    # Extract the painter's name for the test sample
    painter = image_metadata_reduced_df.iloc[test_idx]['artist_name']
    
    # Check if the predicted genre is in the actual genres
    if genre in actual_genres:
        correct_predictions += 1

Training model 1/5
Epoch 1/30, Loss: 4.1969
Epoch 2/30, Loss: 4.0157
Epoch 3/30, Loss: 3.8089
Epoch 4/30, Loss: 3.5793
Epoch 5/30, Loss: 3.3718
Epoch 6/30, Loss: 3.1980
Epoch 7/30, Loss: 3.0608
Epoch 8/30, Loss: 2.9458
Epoch 9/30, Loss: 2.8380
Epoch 10/30, Loss: 2.7405
Epoch 11/30, Loss: 2.6492
Epoch 12/30, Loss: 2.5599
Epoch 13/30, Loss: 2.4830
Epoch 14/30, Loss: 2.4036
Epoch 15/30, Loss: 2.3352
Epoch 16/30, Loss: 2.2680
Epoch 17/30, Loss: 2.2076
Epoch 18/30, Loss: 2.1473
Epoch 19/30, Loss: 2.0889
Epoch 20/30, Loss: 2.0359
Epoch 21/30, Loss: 1.9782
Epoch 22/30, Loss: 1.9322
Epoch 23/30, Loss: 1.8865
Epoch 24/30, Loss: 1.8320
Epoch 25/30, Loss: 1.7982
Epoch 26/30, Loss: 1.7397
Epoch 27/30, Loss: 1.6937
Epoch 28/30, Loss: 1.6521
Epoch 29/30, Loss: 1.6070
Epoch 30/30, Loss: 1.5733
Training model 2/5
Epoch 1/30, Loss: 0.1141
Epoch 2/30, Loss: 0.0499
Epoch 3/30, Loss: 0.0350
Epoch 4/30, Loss: 0.0286
Epoch 5/30, Loss: 0.0248
Epoch 6/30, Loss: 0.0223
Epoch 7/30, Loss: 0.0203
Epoch 8/30, Loss

In [173]:
# Calculate and display overall accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Boosted Neural Network Accuracy (5 models): {accuracy:.2f}%")

Boosted Neural Network Accuracy (5 models): 66.97%


In [175]:
# Create and train boosted neural networks (10 models)
boosted_nn3 = BoostedNeuralNetwork(SimpleNeuralNetwork2, n_models=10, input_size=X_train.shape[1], hidden_sizes=[512, 256], output_size=y_train.shape[1], learning_rate=0.001)
## MUST Mention why we lower the learning rate
boosted_nn3.train(X_train, y_train, epochs=30, batch_size=32)

# Predict probabilities for the test set
boosted_predictions = boosted_nn3.predict(X_test)  # Shape: (n_samples, num_classes)

# Get the highest probability and corresponding genres
max_probs = np.max(boosted_predictions, axis=1)  # Highest probability for each sample
predicted_genres_indices = np.argmax(boosted_predictions, axis=1)  # Index of the highest probability

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions, actual genres, and test indices
for i, (prob, genre, actual_genre_list, test_idx) in enumerate(zip(max_probs, predicted_genres, y_test, test_indices)):
    # Map one-hot encoded actual genres to their names
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre_list) if val == 1]
    
    # Extract the painter's name for the test sample
    painter = image_metadata_reduced_df.iloc[test_idx]['artist_name']
    
    # Check if the predicted genre is in the actual genres
    if genre in actual_genres:
        correct_predictions += 1

Training model 1/10
Epoch 1/30, Loss: 4.1778
Epoch 2/30, Loss: 3.9892
Epoch 3/30, Loss: 3.7776
Epoch 4/30, Loss: 3.5522
Epoch 5/30, Loss: 3.3465
Epoch 6/30, Loss: 3.1765
Epoch 7/30, Loss: 3.0382
Epoch 8/30, Loss: 2.9171
Epoch 9/30, Loss: 2.8063
Epoch 10/30, Loss: 2.7054
Epoch 11/30, Loss: 2.6142
Epoch 12/30, Loss: 2.5247
Epoch 13/30, Loss: 2.4459
Epoch 14/30, Loss: 2.3727
Epoch 15/30, Loss: 2.3037
Epoch 16/30, Loss: 2.2381
Epoch 17/30, Loss: 2.1800
Epoch 18/30, Loss: 2.1393
Epoch 19/30, Loss: 2.0688
Epoch 20/30, Loss: 2.0169
Epoch 21/30, Loss: 1.9648
Epoch 22/30, Loss: 1.9149
Epoch 23/30, Loss: 1.8649
Epoch 24/30, Loss: 1.8241
Epoch 25/30, Loss: 1.7813
Epoch 26/30, Loss: 1.7325
Epoch 27/30, Loss: 1.6859
Epoch 28/30, Loss: 1.6482
Epoch 29/30, Loss: 1.6059
Epoch 30/30, Loss: 1.5658
Training model 2/10
Epoch 1/30, Loss: 0.0999
Epoch 2/30, Loss: 0.0315
Epoch 3/30, Loss: 0.0150
Epoch 4/30, Loss: 0.0075
Epoch 5/30, Loss: 0.0029
Epoch 6/30, Loss: -0.0004
Epoch 7/30, Loss: -0.0030
Epoch 8/30, 

In [177]:
# Calculate and display overall accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Boosted Neural Network Accuracy (10 models): {accuracy:.2f}%")

Boosted Neural Network Accuracy (10 models): 68.58%


## Model X: Neural Network with Droupout (We don't use this)

In [100]:
class SimpleNeuralNetworkWithDropout:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.01, dropout_rate=0.5):
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate

        # Initialize weights and biases
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * 0.01,
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * 0.01,
            "W3": np.random.randn(hidden_sizes[1], output_size) * 0.01,
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, output_size)),
        }

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0

    def sigmoid(self, Z):
        return 1 / (1 + np.exp(-Z))

    def binary_cross_entropy_loss(self, y_pred, y_true):
        n_samples = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) / n_samples

    def apply_dropout(self, A):
        mask = (np.random.rand(*A.shape) > self.dropout_rate) / (1 - self.dropout_rate)
        return A * mask

    def forward(self, X, training=True):
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)
        if training:
            self.A1 = self.apply_dropout(self.A1)

        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)
        if training:
            self.A2 = self.apply_dropout(self.A2)

        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.sigmoid(self.Z3)

        return self.A3

    def backward(self, X, y_true, y_pred):
        n_samples = y_true.shape[0]

        dZ3 = y_pred - y_true
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        self.weights["W1"] -= self.learning_rate * dW1
        self.biases["b1"] -= self.learning_rate * db1
        self.weights["W2"] -= self.learning_rate * dW2
        self.biases["b2"] -= self.learning_rate * db2
        self.weights["W3"] -= self.learning_rate * dW3
        self.biases["b3"] -= self.learning_rate * db3

    def train(self, X, y, epochs=20, batch_size=32):
        n_samples = X.shape[0]
        for epoch in range(epochs):
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            y_pred_full = self.forward(X, training=False)
            loss = self.binary_cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        y_pred = self.forward(X, training=False)
        max_probs = np.max(y_pred, axis=1)
        max_labels = np.argmax(y_pred, axis=1)
        return max_probs, max_labels
    

In [37]:
# Define model parameters
input_size = X_train.shape[1]  # Number of input features
hidden_sizes = [512, 256]      # Two hidden layers
output_size = y_train.shape[1]  # Number of output genres
learning_rate = 0.01           # Learning rate
dropout_rate = 0.2            # Dropout rate

# Initialize the neural network with dropout
nn_with_dropout = SimpleNeuralNetworkWithDropout(input_size, hidden_sizes, output_size, learning_rate, dropout_rate)

# Train the neural network
nn_with_dropout.train(X_train, y_train, epochs=30, batch_size=32)

Epoch 1/30, Loss: 2.9636
Epoch 2/30, Loss: 2.3003
Epoch 3/30, Loss: 1.9535
Epoch 4/30, Loss: 1.7726
Epoch 5/30, Loss: 1.5297
Epoch 6/30, Loss: 1.3660
Epoch 7/30, Loss: 1.1766
Epoch 8/30, Loss: 1.1005
Epoch 9/30, Loss: 1.0347
Epoch 10/30, Loss: 0.8907
Epoch 11/30, Loss: 0.7863
Epoch 12/30, Loss: 0.6892
Epoch 13/30, Loss: 0.6217
Epoch 14/30, Loss: 0.6101
Epoch 15/30, Loss: 0.5124
Epoch 16/30, Loss: 0.5326
Epoch 17/30, Loss: 0.4445
Epoch 18/30, Loss: 0.3953
Epoch 19/30, Loss: 0.3212
Epoch 20/30, Loss: 0.3227
Epoch 21/30, Loss: 0.2894
Epoch 22/30, Loss: 0.2623
Epoch 23/30, Loss: 0.2668
Epoch 24/30, Loss: 0.2305
Epoch 25/30, Loss: 0.1915
Epoch 26/30, Loss: 0.1908
Epoch 27/30, Loss: 0.2087
Epoch 28/30, Loss: 0.2002
Epoch 29/30, Loss: 0.2077
Epoch 30/30, Loss: 0.1335


In [39]:
# Get predictions on the test set
max_probs, predicted_genres_indices = nn_with_dropout.predict(X_test)

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters for evaluation
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions and compare with actual genres
for i, (prob, genre_idx, actual_genre) in enumerate(zip(max_probs, predicted_genres_indices, y_test)):
    predicted_genre = genre_columns[genre_idx]
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre) if val == 1]
    
    # Check if the predicted genre is in the actual genres
    is_correct = predicted_genre in actual_genres
    if is_correct:
        correct_predictions += 1

    # Display the results
    print(f"Sample {i + 1}:")
    print(f"Predicted Genre: {predicted_genre} (Prob: {prob:.9f})")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct: {'Yes' if is_correct else 'No'}")
    print("-" * 30)

Sample 1:
Predicted Genre: Northern Renaissance (Prob: 0.999999991)
Actual Genres: ['Northern Renaissance']
Correct: Yes
------------------------------
Sample 2:
Predicted Genre: Post-Impressionism (Prob: 0.314340543)
Actual Genres: ['Romanticism']
Correct: No
------------------------------
Sample 3:
Predicted Genre: Cubism (Prob: 0.858752883)
Actual Genres: ['Impressionism']
Correct: No
------------------------------
Sample 4:
Predicted Genre: High Renaissance (Prob: 0.998770804)
Actual Genres: ['Realism']
Correct: No
------------------------------
Sample 5:
Predicted Genre: Romanticism (Prob: 0.999783567)
Actual Genres: ['Romanticism']
Correct: Yes
------------------------------
Sample 6:
Predicted Genre: Post-Impressionism (Prob: 0.995638185)
Actual Genres: ['Post-Impressionism', 'Symbolism']
Correct: Yes
------------------------------
Sample 7:
Predicted Genre: Baroque (Prob: 0.999999996)
Actual Genres: ['Baroque']
Correct: Yes
------------------------------
Sample 8:
Predicted Gen

In [41]:
# Calculate and display accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"Dropout Dural Network Accuracy: {accuracy:.2f}%")

Dropout Dural Network Accuracy: 73.07%


# MODEL Y: With He Inititialization (we don't use this)
This can help the network converge faster and reduce sensitivity to initial weights, making training more stable.

In [126]:
class NeuralNetworkWithHeInit:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.01):
        self.learning_rate = learning_rate

        # Initialize weights and biases using He Initialization
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * np.sqrt(2 / input_size),
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * np.sqrt(2 / hidden_sizes[0]),
            "W3": np.random.randn(hidden_sizes[1], output_size) * np.sqrt(2 / hidden_sizes[1]),
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, output_size)),
        }

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0


    def sigmoid(self, Z):
        Z = np.clip(Z, -500, 500)  # Prevent overflow
        return 1 / (1 + np.exp(-Z))

    def binary_cross_entropy_loss(self, y_pred, y_true):
        n_samples = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) / n_samples

    def forward(self, X):
        # Forward propagation
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)

        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)

        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.sigmoid(self.Z3)  # Sigmoid activation for multi-label classification

        return self.A3

    def backward(self, X, y_true, y_pred):
        # Backward propagation
        n_samples = y_true.shape[0]

        dZ3 = y_pred - y_true  # Binary cross-entropy gradient
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        # Update weights and biases
        self.weights["W1"] -= self.learning_rate * dW1
        self.biases["b1"] -= self.learning_rate * db1
        self.weights["W2"] -= self.learning_rate * dW2
        self.biases["b2"] -= self.learning_rate * db2
        self.weights["W3"] -= self.learning_rate * dW3
        self.biases["b3"] -= self.learning_rate * db3

    def train(self, X, y, epochs=20, batch_size=32):
        n_samples = X.shape[0]

        for epoch in range(epochs):
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            y_pred_full = self.forward(X)
            loss = self.binary_cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        y_pred = self.forward(X)
        max_probs = np.max(y_pred, axis=1)  # Highest probability for each sample
        max_labels = np.argmax(y_pred, axis=1)  # Index of the highest probability
        return max_probs, max_labels

In [128]:
# Define model parameters
input_size = X_train.shape[1]  # Number of input features
hidden_sizes = [512, 256]      # Two hidden layers
output_size = y_train.shape[1]  # Number of output genres
learning_rate = 0.01           # Learning rate

# Initialize and train the neural network
nn_he_init = NeuralNetworkWithHeInit(input_size, hidden_sizes, output_size, learning_rate)
nn_he_init.train(X_train, y_train, epochs=30, batch_size=32)

Epoch 1/30, Loss: 5.7375
Epoch 2/30, Loss: 4.1165
Epoch 3/30, Loss: 3.5347
Epoch 4/30, Loss: 3.2511
Epoch 5/30, Loss: 2.9116
Epoch 6/30, Loss: 2.7572
Epoch 7/30, Loss: 2.3976
Epoch 8/30, Loss: 2.2455
Epoch 9/30, Loss: 2.1879
Epoch 10/30, Loss: 2.3863
Epoch 11/30, Loss: 2.4007
Epoch 12/30, Loss: 1.7477
Epoch 13/30, Loss: 1.8637
Epoch 14/30, Loss: 1.9179
Epoch 15/30, Loss: 1.4622
Epoch 16/30, Loss: 1.6017
Epoch 17/30, Loss: 1.3597
Epoch 18/30, Loss: 1.2826
Epoch 19/30, Loss: 1.2866
Epoch 20/30, Loss: 1.2425
Epoch 21/30, Loss: 1.1326
Epoch 22/30, Loss: 1.1239
Epoch 23/30, Loss: 1.0162
Epoch 24/30, Loss: 1.0386
Epoch 25/30, Loss: 0.8726
Epoch 26/30, Loss: 0.9218
Epoch 27/30, Loss: 0.7937
Epoch 28/30, Loss: 0.7102
Epoch 29/30, Loss: 0.7072
Epoch 30/30, Loss: 0.7832


In [130]:
# Predict on test data
max_probs, predicted_genres_indices = nn_he_init.predict(X_test)

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters for evaluation
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions and compare with actual genres
for i, (prob, genre_idx, actual_genre) in enumerate(zip(max_probs, predicted_genres_indices, y_test)):
    predicted_genre = genre_columns[genre_idx]
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre) if val == 1]
    
    # Check if the predicted genre is in the actual genres
    is_correct = predicted_genre in actual_genres
    if is_correct:
        correct_predictions += 1

    # Display the results
    print(f"Sample {i + 1}:")
    print(f"Predicted Genre: {predicted_genre} (Prob: {prob:.9f})")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct: {'Yes' if is_correct else 'No'}")
    print("-" * 30)

Sample 1:
Predicted Genre: Northern Renaissance (Prob: 0.999931252)
Actual Genres: ['Northern Renaissance']
Correct: Yes
------------------------------
Sample 2:
Predicted Genre: Post-Impressionism (Prob: 0.697912954)
Actual Genres: ['Romanticism']
Correct: No
------------------------------
Sample 3:
Predicted Genre: Cubism (Prob: 0.747783394)
Actual Genres: ['Impressionism']
Correct: No
------------------------------
Sample 4:
Predicted Genre: High Renaissance (Prob: 0.900545432)
Actual Genres: ['Realism']
Correct: No
------------------------------
Sample 5:
Predicted Genre: Romanticism (Prob: 0.978447900)
Actual Genres: ['Romanticism']
Correct: Yes
------------------------------
Sample 6:
Predicted Genre: Impressionism (Prob: 0.993154509)
Actual Genres: ['Post-Impressionism', 'Symbolism']
Correct: No
------------------------------
Sample 7:
Predicted Genre: Baroque (Prob: 0.904265886)
Actual Genres: ['Baroque']
Correct: Yes
------------------------------
Sample 8:
Predicted Genre: Ma

In [132]:
# Calculate and display accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"With He Initialization Accuracy: {accuracy:.2f}%")

With He Initialization Accuracy: 63.20%


# MODEL 3: Neural Network with Adam Optimizer
Here’s the key idea:  
- Adam combines the benefits of Momentum Optimization and RMSProp.
- It uses exponential moving averages of the gradients and the squared gradients to adaptively adjust the learning rate for each weight.
- This approach can handle noisy gradients and sparse data, which might be beneficial in your case.

In [110]:
class NeuralNetworkWithAdam:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1  # Exponential decay rate for the first moment estimates
        self.beta2 = beta2  # Exponential decay rate for the second moment estimates
        self.epsilon = epsilon  # Small value to prevent division by zero

        # Initialize weights and biases
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * np.sqrt(2 / input_size),
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * np.sqrt(2 / hidden_sizes[0]),
            "W3": np.random.randn(hidden_sizes[1], output_size) * np.sqrt(2 / hidden_sizes[1]),
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, output_size)),
        }

        # Initialize Adam parameters
        self.m_weights = {key: np.zeros_like(value) for key, value in self.weights.items()}
        self.v_weights = {key: np.zeros_like(value) for key, value in self.weights.items()}
        self.m_biases = {key: np.zeros_like(value) for key, value in self.biases.items()}
        self.v_biases = {key: np.zeros_like(value) for key, value in self.biases.items()}
        self.t = 0  # Time step for Adam

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0

    def sigmoid(self, Z):
        Z = np.clip(Z, -100, 100)  # Prevent overflow
        return 1 / (1 + np.exp(-Z))

    def binary_cross_entropy_loss(self, y_pred, y_true):
        n_samples = y_true.shape[0]
        return -np.sum(y_true * np.log(y_pred + 1e-10) + (1 - y_true) * np.log(1 - y_pred + 1e-10)) / n_samples

    def forward(self, X):
        # Forward propagation
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)

        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)

        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.sigmoid(self.Z3)  # Sigmoid activation for multi-label classification

        return self.A3

    def adam_update(self, gradients, params, m_params, v_params):
        self.t += 1
        updated_params = {}
        for key in params.keys():
            # Update biased first moment estimate
            m_params[key] = self.beta1 * m_params[key] + (1 - self.beta1) * gradients[key]

            # Update biased second raw moment estimate
            v_params[key] = self.beta2 * v_params[key] + (1 - self.beta2) * (gradients[key] ** 2)

            # Correct bias in first moment
            m_hat = m_params[key] / (1 - self.beta1 ** self.t)

            # Correct bias in second raw moment
            v_hat = v_params[key] / (1 - self.beta2 ** self.t)

            # Update parameters
            updated_params[key] = params[key] - self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

        return updated_params, m_params, v_params

    def backward(self, X, y_true, y_pred):
        # Backward propagation
        n_samples = y_true.shape[0]

        dZ3 = y_pred - y_true  # Binary cross-entropy gradient
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        # Collect gradients
        gradients = {
            "W1": dW1,
            "W2": dW2,
            "W3": dW3,
            "b1": db1,
            "b2": db2,
            "b3": db3,
        }

        # Update weights and biases using Adam
        self.weights, self.m_weights, self.v_weights = self.adam_update(
            {key: gradients[key] for key in self.weights.keys()},
            self.weights,
            self.m_weights,
            self.v_weights,
        )
        self.biases, self.m_biases, self.v_biases = self.adam_update(
            {key: gradients[key] for key in self.biases.keys()},
            self.biases,
            self.m_biases,
            self.v_biases,
        )

    def train(self, X, y, epochs=20, batch_size=32):
        n_samples = X.shape[0]

        for epoch in range(epochs):
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            y_pred_full = self.forward(X)
            loss = self.binary_cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        y_pred = self.forward(X)
        max_probs = np.max(y_pred, axis=1)  # Highest probability for each sample
        max_labels = np.argmax(y_pred, axis=1)  # Index of the highest probability
        return max_probs, max_labels

In [211]:
# TEST 1: Define model parameters
input_size = X_train.shape[1]  # Number of input features
hidden_sizes = [512, 256]      # Two hidden layers test 1
output_size = y_train.shape[1]  # Number of output genres
learning_rate = 0.001           # Lower learning rate for Adam (MUST INCLUDE IN REPORT)

# Initialize and train the neural network
nn_adam = NeuralNetworkWithAdam(input_size, hidden_sizes, output_size, learning_rate)
nn_adam.train(X_train, y_train, epochs=30, batch_size=32)

Epoch 1/30, Loss: 4.7105
Epoch 2/30, Loss: 2.7131
Epoch 3/30, Loss: 2.0021
Epoch 4/30, Loss: 1.5885
Epoch 5/30, Loss: 1.3460
Epoch 6/30, Loss: 1.0801
Epoch 7/30, Loss: 0.8849
Epoch 8/30, Loss: 0.7242
Epoch 9/30, Loss: 0.5641
Epoch 10/30, Loss: 0.5509
Epoch 11/30, Loss: 0.4739
Epoch 12/30, Loss: 0.3780
Epoch 13/30, Loss: 0.3182
Epoch 14/30, Loss: 0.2539
Epoch 15/30, Loss: 0.2318
Epoch 16/30, Loss: 0.3594
Epoch 17/30, Loss: 0.4879
Epoch 18/30, Loss: 0.4673
Epoch 19/30, Loss: 0.3261
Epoch 20/30, Loss: 0.3320
Epoch 21/30, Loss: 0.1895
Epoch 22/30, Loss: 0.1404
Epoch 23/30, Loss: 0.1482
Epoch 24/30, Loss: 0.1530
Epoch 25/30, Loss: 0.1857
Epoch 26/30, Loss: 0.4675
Epoch 27/30, Loss: 0.4691
Epoch 28/30, Loss: 0.3468
Epoch 29/30, Loss: 0.2133
Epoch 30/30, Loss: 0.2020


In [213]:
# Predict on test data
max_probs, predicted_genres_indices = nn_adam.predict(X_test)

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters for evaluation
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions and compare with actual genres
for i, (prob, genre_idx, actual_genre) in enumerate(zip(max_probs, predicted_genres_indices, y_test)):
    predicted_genre = genre_columns[genre_idx]
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre) if val == 1]
    
    # Check if the predicted genre is in the actual genres
    is_correct = predicted_genre in actual_genres
    if is_correct:
        correct_predictions += 1
        
    # Display the results
    print(f"Sample {i + 1}:")
    print(f"Predicted Genre: {predicted_genre} (Prob: {prob:.9f})")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct: {'Yes' if is_correct else 'No'}")
    print("-" * 30)

Sample 1:
Predicted Genre: Northern Renaissance (Prob: 1.000000000)
Actual Genres: ['Northern Renaissance']
Correct: Yes
------------------------------
Sample 2:
Predicted Genre: Post-Impressionism (Prob: 0.026843557)
Actual Genres: ['Romanticism']
Correct: No
------------------------------
Sample 3:
Predicted Genre: Cubism (Prob: 0.988881082)
Actual Genres: ['Impressionism']
Correct: No
------------------------------
Sample 4:
Predicted Genre: High Renaissance (Prob: 0.999999753)
Actual Genres: ['Realism']
Correct: No
------------------------------
Sample 5:
Predicted Genre: Romanticism (Prob: 1.000000000)
Actual Genres: ['Romanticism']
Correct: Yes
------------------------------
Sample 6:
Predicted Genre: Post-Impressionism (Prob: 0.999981173)
Actual Genres: ['Post-Impressionism', 'Symbolism']
Correct: Yes
------------------------------
Sample 7:
Predicted Genre: Baroque (Prob: 1.000000000)
Actual Genres: ['Baroque']
Correct: Yes
------------------------------
Sample 8:
Predicted Gen

In [215]:
# Calculate and display accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"With Adam Optimizer Accuracy: {accuracy:.2f}%")

With Adam Optimizer Accuracy: 67.27%


In [118]:
# TEST 2 increase the size of hidden layers [4096, 2048]: Define model parameters
input_size = X_train.shape[1]  # Number of input features
hidden_sizes = [4096, 2048]      # Two hidden layers test 3 [4096, 2048]
output_size = y_train.shape[1]  # Number of output genres
learning_rate = 0.001           # Lower learning rate for Adam

# Initialize and train the neural network
nn_adam = NeuralNetworkWithAdam(input_size, hidden_sizes, output_size, learning_rate)
nn_adam.train(X_train, y_train, epochs=30, batch_size=32)

Epoch 1/30, Loss: 3.1751
Epoch 2/30, Loss: 2.2241
Epoch 3/30, Loss: 1.8944
Epoch 4/30, Loss: 1.4790
Epoch 5/30, Loss: 1.1267
Epoch 6/30, Loss: 0.9905
Epoch 7/30, Loss: 1.0335
Epoch 8/30, Loss: 0.8697
Epoch 9/30, Loss: 0.6316
Epoch 10/30, Loss: 0.6557
Epoch 11/30, Loss: 0.5302
Epoch 12/30, Loss: 0.4554
Epoch 13/30, Loss: 0.6030
Epoch 14/30, Loss: 0.6351
Epoch 15/30, Loss: 0.4627
Epoch 16/30, Loss: 0.6748
Epoch 17/30, Loss: 0.5555
Epoch 18/30, Loss: 0.7177
Epoch 19/30, Loss: 0.8166
Epoch 20/30, Loss: 0.8330
Epoch 21/30, Loss: 0.5338
Epoch 22/30, Loss: 0.4784
Epoch 23/30, Loss: 0.6168
Epoch 24/30, Loss: 0.6518
Epoch 25/30, Loss: 0.4732
Epoch 26/30, Loss: 0.4228
Epoch 27/30, Loss: 0.4166
Epoch 28/30, Loss: 0.4412
Epoch 29/30, Loss: 0.5996
Epoch 30/30, Loss: 0.5100


In [120]:
# Predict on test data (TEST 3 Layer Parameter [4096, 2048])
max_probs, predicted_genres_indices = nn_adam.predict(X_test)

# Map predicted genre indices to genre names
predicted_genres = [genre_columns[idx] for idx in predicted_genres_indices]

# Initialize counters for evaluation
correct_predictions = 0
total_predictions = len(y_test)

# Iterate through predictions and compare with actual genres
for i, (prob, genre_idx, actual_genre) in enumerate(zip(max_probs, predicted_genres_indices, y_test)):
    predicted_genre = genre_columns[genre_idx]
    actual_genres = [genre_columns[idx] for idx, val in enumerate(actual_genre) if val == 1]
    
    # Check if the predicted genre is in the actual genres
    is_correct = predicted_genre in actual_genres
    if is_correct:
        correct_predictions += 1

    # Display the results
    print(f"Sample {i + 1}:")
    print(f"Predicted Genre: {predicted_genre} (Prob: {prob:.9f})")
    print(f"Actual Genres: {actual_genres}")
    print(f"Correct: {'Yes' if is_correct else 'No'}")
    print("-" * 30)

Sample 1:
Predicted Genre: Northern Renaissance (Prob: 1.000000000)
Actual Genres: ['Northern Renaissance']
Correct: Yes
------------------------------
Sample 2:
Predicted Genre: Symbolism (Prob: 0.023085172)
Actual Genres: ['Romanticism']
Correct: No
------------------------------
Sample 3:
Predicted Genre: Impressionism (Prob: 0.943204958)
Actual Genres: ['Impressionism']
Correct: Yes
------------------------------
Sample 4:
Predicted Genre: High Renaissance (Prob: 0.958752083)
Actual Genres: ['Realism']
Correct: No
------------------------------
Sample 5:
Predicted Genre: Romanticism (Prob: 0.999999996)
Actual Genres: ['Romanticism']
Correct: Yes
------------------------------
Sample 6:
Predicted Genre: Post-Impressionism (Prob: 0.999994854)
Actual Genres: ['Post-Impressionism', 'Symbolism']
Correct: Yes
------------------------------
Sample 7:
Predicted Genre: Baroque (Prob: 1.000000000)
Actual Genres: ['Baroque']
Correct: Yes
------------------------------
Sample 8:
Predicted Genr

In [122]:
# Calculate and display accuracy
accuracy = correct_predictions / total_predictions * 100
print(f"With Adam Optimizer Accuracy: {accuracy:.2f}%")

With Adam Optimizer Accuracy: 67.62%
