# Step 1: Organize Image Data  
1. Parse the filenames in the resized/ folder to extract the artist name.
2. Match each artist name to its genre using the metadata file artists.csv.

In [72]:
import pandas as pd

# Load artists.csv (Change to your path)
artists_df = pd.read_csv("/Users/hoon/Desktop/4060J_DataScience_Project/artists.csv")
artists_df.head()

Unnamed: 0,id,name,years,genre,nationality,bio,wikipedia,paintings
0,0,Amedeo Modigliani,1884 - 1920,Expressionism,Italian,Amedeo Clemente Modigliani (Italian pronunciat...,http://en.wikipedia.org/wiki/Amedeo_Modigliani,193
1,1,Vasiliy Kandinskiy,1866 - 1944,"Expressionism,Abstractionism",Russian,Wassily Wassilyevich Kandinsky (Russian: Васи́...,http://en.wikipedia.org/wiki/Wassily_Kandinsky,88
2,2,Diego Rivera,1886 - 1957,"Social Realism,Muralism",Mexican,Diego María de la Concepción Juan Nepomuceno E...,http://en.wikipedia.org/wiki/Diego_Rivera,70
3,3,Claude Monet,1840 - 1926,Impressionism,French,Oscar-Claude Monet (; French: [klod mɔnɛ]; 14 ...,http://en.wikipedia.org/wiki/Claude_Monet,73
4,4,Rene Magritte,1898 - 1967,"Surrealism,Impressionism",Belgian,René François Ghislain Magritte (French: [ʁəne...,http://en.wikipedia.org/wiki/René_Magritte,194


In [74]:
## Parse Filenames in the resized Folder
import os

# Path to the resized folder (Change to your path)
image_folder = "/Users/hoon/Desktop/4060J_DataScience_Project/resized"

# List to store artist names and filenames
image_metadata = []

# Parse filenames
for file_name in os.listdir(image_folder):
    if file_name.endswith(".jpg"):
        # Extract artist name from filename
        artist_name = "_".join(file_name.split("_")[:-1]).replace("_", " ")
        image_metadata.append({'file_name': file_name, 'artist_name': artist_name})

# Create a DataFrame
image_metadata_df = pd.DataFrame(image_metadata)

In [76]:
## Verify Missing or Mismatched Data

# Artists in the resized folder
parsed_artists = image_metadata_df['artist_name'].unique()

# Artists in the CSV file
csv_artists = artists_df['name'].unique()

# Find missing artists
missing_artists = set(csv_artists) - set(parsed_artists)
print("Missing artists:")
print(missing_artists)

Missing artists:
{'Albrecht Dürer'}


In [78]:
import unicodedata

# Normalize artist names
def normalize_name(name):
    return unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('utf-8')

# Normalize names in both datasets
image_metadata_df['artist_name'] = image_metadata_df['artist_name'].apply(normalize_name)
artists_df['name'] = artists_df['name'].apply(normalize_name)

In [80]:
## Sample 20 paintings per artist
sampled_metadata_df = image_metadata_df.groupby('artist_name').apply(
    lambda x: x.sample(n=min(20, len(x)), random_state=42)
).reset_index(drop=True)

  sampled_metadata_df = image_metadata_df.groupby('artist_name').apply(


In [82]:
image_metadata_df

Unnamed: 0,file_name,artist_name
0,Gustav_Klimt_113.jpg,Gustav Klimt
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani
3,Edgar_Degas_455.jpg,Edgar Degas
4,Edgar_Degas_333.jpg,Edgar Degas
...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel
8351,Joan_Miro_51.jpg,Joan Miro
8352,Frida_Kahlo_10.jpg,Frida Kahlo
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh


In [84]:
sampled_metadata_df

Unnamed: 0,file_name,artist_name
0,Albrecht_Dürer_5.jpg,Albrecht Durer
1,Albrecht_Dürer_9.jpg,Albrecht Durer
2,Albrecht_Dürer_252.jpg,Albrecht Durer
3,Albrecht_Dürer_201.jpg,Albrecht Durer
4,Albrecht_Dürer_328.jpg,Albrecht Durer
...,...,...
995,William_Turner_48.jpg,William Turner
996,William_Turner_39.jpg,William Turner
997,William_Turner_2.jpg,William Turner
998,William_Turner_42.jpg,William Turner


# STEP 2: Exract Visual Features  
Use **ResNet50**, a pretrained Convolutional Neural Network (CNN), to extract features from the images. These features capture the visual aspects of the artwork (e.g., colors, shapes, brushstrokes).

In [16]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np

# Load the pretrained ResNet50 model (without the top classification layer)
model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Function to extract features from an image
def extract_features(image_path):
    img = load_img(image_path, target_size=(224, 224))  # Resize image to 224x224
    img_array = img_to_array(img)
    img_array = preprocess_input(img_array)  # Preprocess for ResNet50
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    features = model.predict(img_array)
    return features.flatten()  # Flatten the feature map to a 1D vector

## For 1000 paintinngs (20 paintings/artist)

In [21]:
# Add features to the DataFrame
sampled_metadata_df['features'] = sampled_metadata_df['file_name'].apply(
    lambda x: extract_features(os.path.join(image_folder, x))
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 529ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

In [23]:
sampled_metadata_df

Unnamed: 0,file_name,artist_name,features
0,Albrecht_Dürer_5.jpg,Albrecht Durer,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,Albrecht_Dürer_9.jpg,Albrecht Durer,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.3402357, 0.0,..."
2,Albrecht_Dürer_252.jpg,Albrecht Durer,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.2304628, 0.0,..."
3,Albrecht_Dürer_201.jpg,Albrecht Durer,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,Albrecht_Dürer_328.jpg,Albrecht Durer,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...
995,William_Turner_48.jpg,William Turner,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
996,William_Turner_39.jpg,William Turner,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
997,William_Turner_2.jpg,William Turner,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
998,William_Turner_42.jpg,William Turner,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [40]:
image_metadata_df['features'] = image_metadata_df['file_name'].apply(
    lambda x: extract_features(os.path.join(image_folder, x))
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58

In [42]:
image_metadata_df

Unnamed: 0,file_name,artist_name,features
0,Gustav_Klimt_113.jpg,Gustav Klimt,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh,"[0.0, 2.0485382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,Edgar_Degas_455.jpg,Edgar Degas,"[2.004991, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,Edgar_Degas_333.jpg,Edgar Degas,"[0.0, 0.0, 0.0, 5.8787217, 0.0, 0.0, 0.0, 0.0,..."
...,...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel,"[0.0, 1.0881025, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8351,Joan_Miro_51.jpg,Joan Miro,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8352,Frida_Kahlo_10.jpg,Frida Kahlo,"[5.9169617, 0.0, 0.7193576, 0.0, 0.0, 0.0, 0.0..."
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


## Artists Names Encoding  
Encode the artist names as numerical labels for classification

In [47]:
from sklearn.preprocessing import LabelEncoder

# Encode artist names
artist_encoder = LabelEncoder()
image_metadata_df['artist_label'] = artist_encoder.fit_transform(image_metadata_df['artist_name']) #All paintings
sampled_metadata_df['artist_label'] = artist_encoder.fit_transform(sampled_metadata_df['artist_name']) # Sample 1000 paintings

In [94]:
image_metadata_df

Unnamed: 0,file_name,artist_name
0,Gustav_Klimt_113.jpg,Gustav Klimt
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani
3,Edgar_Degas_455.jpg,Edgar Degas
4,Edgar_Degas_333.jpg,Edgar Degas
...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel
8351,Joan_Miro_51.jpg,Joan Miro
8352,Frida_Kahlo_10.jpg,Frida Kahlo
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh


In [92]:
sampled_metadata_df

Unnamed: 0,file_name,artist_name
0,Albrecht_Dürer_5.jpg,Albrecht Durer
1,Albrecht_Dürer_9.jpg,Albrecht Durer
2,Albrecht_Dürer_252.jpg,Albrecht Durer
3,Albrecht_Dürer_201.jpg,Albrecht Durer
4,Albrecht_Dürer_328.jpg,Albrecht Durer
...,...,...
995,William_Turner_48.jpg,William Turner
996,William_Turner_39.jpg,William Turner
997,William_Turner_2.jpg,William Turner
998,William_Turner_42.jpg,William Turner


# Preprocessing Done (Export as PICKLE) 
PICKLE, instead of csv format, is used because the features column contains a list or array, which is not easily serializable into a CSV.

In [68]:
# Export as a pickle file
image_metadata_df.to_pickle("/Users/hoon/Desktop/image_metadata.pkl")
sampled_metadata_df.to_pickle("/Users/hoon/Desktop/sampled_metadata.pkl")

In [100]:
# Load the data
image_metadata_df = pd.read_pickle('/Users/hoon/Desktop/image_metadata.pkl')
sampled_metadata_df = pd.read_pickle(("/Users/hoon/Desktop/sampled_metadata.pkl"))

## Apply PCA to redce dimentionality to 100 components

In [210]:
from sklearn.decomposition import PCA

# Step 1: Extract feature matrix from image_metadata_df
X = np.stack(image_metadata_df['features'].values)  # Shape: [n_samples, n_features]

# Step 2: Apply PCA to reduce dimensionality to 100 components
pca = PCA(n_components=100, random_state=42)
X_reduced = pca.fit_transform(X)  # Shape: [n_samples, 100]

# Step 3: Replace the 'features' column with the reduced features
image_metadata_df['features'] = list(X_reduced)  # Store reduced features as lists

In [212]:
image_metadata_df

Unnamed: 0,file_name,artist_name,features,artist_label
0,Gustav_Klimt_113.jpg,Gustav Klimt,"[-104.051445, -30.065264, 40.834404, -27.93678...",19
1,Vincent_van_Gogh_388.jpg,Vincent van Gogh,"[-42.690514, 50.45694, -31.054245, 49.034355, ...",48
2,Amedeo_Modigliani_24.jpg,Amedeo Modigliani,"[85.80638, 74.72561, 21.941647, -97.16099, 6.6...",2
3,Edgar_Degas_455.jpg,Edgar Degas,"[110.57151, -27.311632, -106.11015, -26.606182...",10
4,Edgar_Degas_333.jpg,Edgar Degas,"[128.26991, -22.025055, -65.37834, -10.970392,...",10
...,...,...,...,...
8350,Mikhail_Vrubel_116.jpg,Mikhail Vrubel,"[-112.82037, -11.718549, -2.0029588, -15.04779...",32
8351,Joan_Miro_51.jpg,Joan Miro,"[-69.65717, 75.91622, -17.81189, -51.637184, 2...",27
8352,Frida_Kahlo_10.jpg,Frida Kahlo,"[53.062565, 15.8913965, -43.91363, -41.85438, ...",16
8353,Vincent_van_Gogh_391.jpg,Vincent van Gogh,"[-88.602, -56.594547, -37.02433, 22.392206, -5...",48


In [208]:
## Export to pickle again
image_metadata_df.to_pickle("/Users/hoon/Desktop/image_metadata_reduced.pkl")

# Step 3: K-NN Implementation & Training From the Scratch

Why K-NN? Since it does not make any underlying assumptions about the distribution of data (as opposed to other algorithms such as GMM, which assume a Gaussian distribution of the given data).

In [214]:
image_metadata_reduced_df = pd.read_pickle('/Users/hoon/Desktop/image_metadata_reduced.pkl')

## DATA SPLIT

In [462]:
# Create feature matrix (X) and labels (y) for all 8355 paintings
X = np.stack(image_metadata_reduced_df['features'].values)  # Feature matrix
y_artist = image_metadata_reduced_df['artist_label'].values  # Artist labels

from sklearn.model_selection import train_test_split
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_artist, test_size=0.2, random_state=42)

## Time to Implement & Train

Concept
- K-NN is a distance-based algorithm that classifies a data point by looking at its k nearest neighbors in the training set.
- The label (or class) is assigned based on the majority vote of the nearest neighbors.  
  
Steps  
1. Compute the distance between the query point and all points in the training set.
2. Sort the distances to find the k nearest neighbors.
3. Count the votes of the k neighbors and assign the label with the highest votes.  

**Supporting Functions**

In [416]:
from collections import defaultdict

## 1 Euclidean distance.
def euclidean_distance(x1, x2):
    """
    Compute the Euclidean distance between two points.
    """
    return np.sqrt(np.sum((x1 - x2) ** 2))

## 2 cosine_smilarity
def cosine_similarity(x1, x2):
    """
    Compute the Cosine Similarity between two points.
    """
    dot_product = np.dot(x1, x2)
    norm_x1 = np.linalg.norm(x1)
    norm_x2 = np.linalg.norm(x2)
    return dot_product / (norm_x1 * norm_x2 + 1e-5)

##---------------------------------------------------------------------

## 2-a.
def get_k_nearest_neighbors1(X_train, y_train, query_point, k):
    """
    Find the k nearest neighbors of a query point.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).
    - query_point: The test point to classify (NumPy array of shape [n_features]).
    - k: Number of neighbors to consider.

    Returns:
    - neighbors: A list of the k nearest neighbors' labels.
    """
    distances = []
    
    # Compute distances from the query point to all training points
    for i in range(len(X_train)):
        distance = euclidean_distance(X_train[i], query_point)
        distances.append((distance, y_train[i]))  # Store (distance, label)
    
    # Sort distances by the first element (distance)
    distances.sort(key=lambda x: x[0])
    
    # Select the labels of the k nearest neighbors
    neighbors = [distances[i][1] for i in range(k)]
    return neighbors

## 2-b. 
def get_k_nearest_neighbors2(X_train, y_train, query_point, k):
    """
    Find the k nearest neighbors of a query point.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).
    - query_point: The test point to classify (NumPy array of shape [n_features]).
    - k: Number of neighbors to consider.

    Returns:
    - neighbors: A list of tuples [(distance, label), ...] for the k nearest neighbors.
    """
    distances = []
    
    # Compute distances from the query point to all training points
    for i in range(len(X_train)):
        distance = euclidean_distance(X_train[i], query_point)
        distances.append((distance, y_train[i]))  # Store (distance, label)
    
    # Sort distances by the first element (distance) and select k nearest
    distances.sort(key=lambda x: x[0])
    return distances[:k]  # Return the top-k nearest neighbors

## 2-c.
def get_k_nearest_neighbors_cosine(X_train, y_train, query_point, k):
    """
    Find the k nearest neighbors using cosine similarity.
    Returns a list of tuples (similarity, label) for the k most similar neighbors.
    """
    similarities = []
    
    # Compute cosine similarity between the query point and all training points
    for i in range(len(X_train)):
        similarity = cosine_similarity(X_train[i], query_point)
        similarities.append((similarity, y_train[i]))  # Store (similarity, label)
    
    # Sort by similarity in descending order (higher similarity is better)
    similarities.sort(key=lambda x: x[0], reverse=True)
    
    # Return the k most similar neighbors (including similarity values)
    return similarities[:k]

##---------------------------------------------------------------------
## 3-a. Majority Voting
from collections import Counter
def majority_vote(neighbors):
    """
    Perform majority voting to classify a test point.
    Arguments:
    - neighbors: A list of labels of the k nearest neighbors.

    Returns:
    - The predicted label for the query point.
    """
    # Count occurrences of each label
    label_counts = Counter(neighbors)
    
    # Return the label with the highest count
    return label_counts.most_common(1)[0][0]

## 3-b. Weighted Majority Voting
def weighted_vote(neighbors):
    """
    Perform weighted majority voting based on distances.
    Arguments:
    - neighbors: A list of tuples [(distance, label), ...]

    Returns:
    - The predicted label.
    """
    weights = defaultdict(float)
    
    for distance, label in neighbors:
        # Add a small constant to avoid division by zero
        weights[label] += 1 / (distance + 1e-5)  # Inverse of distance as the weight
    
    # Return the label with the highest total weight
    return max(weights, key=weights.get)


**Different K-NNs**

In [419]:
## 1. Default
def knn_predict1(X_train, y_train, X_test, k):
    """
    Predict the labels for the test set using K-NN from scratch.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).
    - X_test: Test feature matrix (NumPy array of shape [n_test_samples, n_features]).
    - k: Number of neighbors to consider.

    Returns:
    - predictions: A list of predicted labels for the test set.
    """
    predictions = []
    
    for query_point in X_test:
        # Get the k nearest neighbors
        neighbors = get_k_nearest_neighbors1(X_train, y_train, query_point, k)
        
        # Perform majority voting
        predicted_label = majority_vote(neighbors)
        #predicted_label = weighted_vote(neighbors)

        # Append the prediction
        predictions.append(predicted_label)
    
    return predictions


# 2. 
def knn_predict2(X_train, y_train, X_test, k):
    """
    Predict the labels for the test set using K-NN from scratch.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).
    - X_test: Test feature matrix (NumPy array of shape [n_test_samples, n_features]).
    - k: Number of neighbors to consider.

    Returns:
    - predictions: A list of predicted labels for the test set.
    """
    predictions = []
    
    for query_point in X_test:
        # Get the k nearest neighbors
        neighbors = get_k_nearest_neighbors2(X_train, y_train, query_point, k)
        
        # Perform majority voting
        predicted_label = weighted_vote(neighbors)

        # Append the prediction
        predictions.append(predicted_label)
    
    return predictions

    
# 3.
def knn_predict3(X_train, y_train, X_test, k):
    """
    Predict the labels for the test set using K-NN from scratch.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).
    - X_test: Test feature matrix (NumPy array of shape [n_test_samples, n_features]).
    - k: Number of neighbors to consider.

    Returns:
    - predictions: A list of predicted labels for the test set.
    """
    predictions = []
    
    for query_point in X_test:
        # Get the k nearest neighbors
        neighbors = get_k_nearest_neighbors_cosine(X_train, y_train, query_point, k)
        
        # Perform majority voting
        predicted_label = weighted_vote(neighbors)

        # Append the prediction
        predictions.append(predicted_label)
    
    return predictions

In [400]:
## Test model 1: knn_predict1

# k=5
y_pred = knn_predict1(X_train, y_train, X_test, 5)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"K-NN Accuracy with distance: {accuracy * 100:.2f}%")
print("done")

K-NN Accuracy with distance: 58.47%
done


In [401]:
## Test model 2: knn_predict2

# k=5
y_pred = knn_predict2(X_train, y_train, X_test, 5)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"K-NN Accuracy with distance: {accuracy * 100:.2f}%")
print("done")

K-NN Accuracy with distance: 58.77%
done


In [421]:
## Test model 3: knn_predict3
# k=5
y_pred = knn_predict3(X_train, y_train, X_test, 5)
accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"K-NN Accuracy with distance: {accuracy * 100:.2f}%")
print("done")

K-NN Accuracy with distance: 55.00%
done


In [377]:
# Test with viT feature extraction

#load pkl
image_metadata_vit_df = pd.read_pickle('/Users/hoon/Desktop/image_metadata_vit.pkl')

'''
##-------------------------Worse accuracy
from sklearn.decomposition import PCA

# Step 1: Extract feature matrix from image_metadata_df
X = np.stack(image_metadata_vit_df['features'].values)  # Shape: [n_samples, n_features]

# Step 2: Apply PCA to reduce dimensionality to 100 components
pca = PCA(n_components=100, random_state=42)
X_reduced_vit = pca.fit_transform(X)  # Shape: [n_samples, 100]

# Step 3: Replace the 'features' column with the reduced features
image_metadata_vit_df['features'] = list(X_reduced_vit)  # Store reduced features as lists
##---------------------------------
'''

# Create feature matrix (X) and labels (y) for all 8355 paintings
X_vit = np.stack(image_metadata_vit_df['features'].values)  # Feature matrix
y_artist_vit = image_metadata_vit_df['artist_label'].values  # Artist labels
print("matrices created")

from sklearn.model_selection import train_test_split
# Split data
X_train_vit, X_test_vit, y_train_vit, y_test_vit = train_test_split(X_vit, y_artist_vit, test_size=0.2, random_state=42)
print("data splitted")

# K-NN (k=5)
y_pred = knn_predict2(X_train_vit, y_train_vit, X_test_vit, 5)
accuracy = np.sum(y_pred == y_test_vit) / len(y_test_vit)
print(f"K-NN Accuracy with distance: {accuracy * 100:.2f}%")
print("done")

matrices created
data splitted
K-NN Accuracy with distance: 58.47%
done


In [434]:
## Probabilistic K-NN with Softmax Scaling:
## Instead of relying on hard labels, compute class probabilities using a softmax over K-NN distances.

def probabilistic_knn(X_train, y_train, X_test, k=5, temperature=0.1):
    """
    Perform probabilistic K-NN classification with softmax scaling.
    """
    from collections import defaultdict
    import numpy as np
    predictions = []

    for query_point in X_test:
        distances = [np.linalg.norm(query_point - x) for x in X_train]
        sorted_indices = np.argsort(distances)[:k]
        top_k_labels = y_train[sorted_indices]
        top_k_distances = np.array(distances)[sorted_indices]

        # Compute softmax-scaled weights
        weights = np.exp(-top_k_distances / temperature)
        weights /= np.sum(weights)

        # Compute class probabilities
        class_probs = defaultdict(float)
        for label, weight in zip(top_k_labels, weights):
            class_probs[label] += weight

        # Predict the class with the highest probability
        predictions.append(max(class_probs, key=class_probs.get))

    return np.array(predictions)

# Example Usage
y_pred = probabilistic_knn(X_train, y_train, X_test, k=5, temperature=0.1)
accuracy = np.mean(y_pred == y_test)
print(f"Probabilistic K-NN Accuracy: {accuracy * 100:.2f}%")

  weights /= np.sum(weights)


Probabilistic K-NN Accuracy: 55.83%


# Try with Prototype-Based Classification

- It summarizes the dataset into class prototypes (mean feature vectors per class), making it less sensitive to noisy or outlier samples.

In [428]:
import numpy as np
from collections import Counter

# Compute class prototypes (mean embeddings)
def compute_class_prototypes(X_train, y_train):
    """
    Compute prototypes (mean feature vector) for each class.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).

    Returns:
    - prototypes: A dictionary where keys are class labels and values are mean feature vectors.
    """
    prototypes = {}
    unique_classes = np.unique(y_train)
    for cls in unique_classes:
        class_features = X_train[y_train == cls]
        prototypes[cls] = np.mean(class_features, axis=0)
    return prototypes

# K-NN to find top-k neighbors and confidence
def knn_with_confidence(X_train, y_train, query_point, k):
    """
    Perform K-NN and return the top-k labels with confidence.
    Arguments:
    - X_train: Training feature matrix (NumPy array of shape [n_samples, n_features]).
    - y_train: Training labels (NumPy array of shape [n_samples]).
    - query_point: The feature vector for the test sample.
    - k: Number of neighbors to consider.

    Returns:
    - knn_pred: The predicted label based on majority vote.
    - knn_confidence: Confidence score for the K-NN prediction (fraction of majority class in top-k).
    """
    # Compute Euclidean distances
    distances = [np.linalg.norm(query_point - x) for x in X_train]
    
    # Sort by distance and get top-k
    sorted_indices = np.argsort(distances)[:k]
    top_k_labels = y_train[sorted_indices]
    
    # Perform majority voting
    label_counts = Counter(top_k_labels)
    knn_pred = label_counts.most_common(1)[0][0]
    
    # Compute confidence as the fraction of the majority class
    knn_confidence = label_counts.most_common(1)[0][1] / k
    return knn_pred, knn_confidence

# Prototype-based classification
def classify_using_prototypes(prototypes, query_point):
    """
    Classify a test point using prototypes.
    Arguments:
    - prototypes: A dictionary of class prototypes.
    - query_point: The feature vector for the test sample.

    Returns:
    - predicted_label: The label of the closest prototype.
    """
    distances = {cls: np.linalg.norm(query_point - proto) for cls, proto in prototypes.items()}
    return min(distances, key=distances.get)  # Class with the smallest distance

# Hybrid classification (combine prototypes and K-NN)
def hybrid_classification(X_train, y_train, X_test, prototypes, k=5, threshold=0.6):
    """
    Hybrid classification using K-NN and prototypes.
    Arguments:
    - X_train: Training feature matrix.
    - y_train: Training labels.
    - X_test: Test feature matrix.
    - prototypes: Precomputed class prototypes.
    - k: Number of neighbors to consider for K-NN.
    - threshold: Confidence threshold for using K-NN.

    Returns:
    - predictions: Predicted labels for the test set.
    """
    predictions = []
    for query_point in X_test:
        # Step 1: Perform K-NN
        knn_pred, knn_confidence = knn_with_confidence(X_train, y_train, query_point, k)
        
        # Step 2: Use K-NN if confidence is high, otherwise use prototypes
        if knn_confidence > threshold:
            predictions.append(knn_pred)
        else:
            proto_pred = classify_using_prototypes(prototypes, query_point)
            predictions.append(proto_pred)
    
    return predictions

In [430]:
# Compute prototypes
prototypes = compute_class_prototypes(X_train, y_train)

# Hybrid classification
k = 5  # Number of neighbors for K-NN
threshold = 0.6  # Confidence threshold for switching to prototypes
y_pred = hybrid_classification(X_train, y_train, X_test, prototypes, k=k, threshold=threshold)

# Evaluate accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Hybrid Classification Accuracy: {accuracy * 100:.2f}%")

Hybrid Classification Accuracy: 49.73%


# Implement Newtral Networks
dataset size permits, train a simple feedforward neural network using the extracted features.

In [526]:
'''
Model 1: SIMPLE NEURAL NETWORKS (Two Hidden Layers)
'''
import numpy as np

# Step 1: Initialize the Neural Network
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.01):
        self.learning_rate = learning_rate

        # Initialize weights and biases
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * 0.01,
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * 0.01,
            "W3": np.random.randn(hidden_sizes[1], output_size) * 0.01,
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, output_size)),
        }

    def relu(self, Z):
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        return Z > 0

    def softmax(self, Z):
        exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

    def cross_entropy_loss(self, y_pred, y_true):
        n_samples = y_true.shape[0]
        log_probs = -np.log(y_pred[range(n_samples), y_true])
        return np.sum(log_probs) / n_samples

    def forward(self, X):
        # Forward propagation
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)

        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)

        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.softmax(self.Z3)

        return self.A3

    def backward(self, X, y_true, y_pred):
        # Backward propagation
        n_samples = y_true.shape[0]
        y_true_one_hot = np.zeros_like(y_pred)
        y_true_one_hot[range(n_samples), y_true] = 1

        # Gradients for the output layer
        dZ3 = y_pred - y_true_one_hot
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        # Gradients for the second hidden layer
        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        # Gradients for the first hidden layer
        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        # Update weights and biases
        self.weights["W1"] -= self.learning_rate * dW1
        self.biases["b1"] -= self.learning_rate * db1

        self.weights["W2"] -= self.learning_rate * dW2
        self.biases["b2"] -= self.learning_rate * db2

        self.weights["W3"] -= self.learning_rate * dW3
        self.biases["b3"] -= self.learning_rate * db3

    def train(self, X, y, epochs=20, batch_size=32):
        n_samples = X.shape[0]

        for epoch in range(epochs):
            # Shuffle the data
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch gradient descent
            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]

                # Forward and backward propagation
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            # Compute loss
            y_pred_full = self.forward(X)
            loss = self.cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)

# Step 7: Train and Evaluate the Neural Network
input_size = X_train.shape[1]
hidden_sizes = [512, 256]  # Two hidden layers with 512 and 256 neurons
output_size = len(np.unique(y_train))

# Initialize the neural network
nn = SimpleNeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.01)

# Train the neural network
nn.train(X_train, y_train, epochs=20, batch_size=32)

# Evaluate on the test set
y_pred = nn.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"Neural Network Accuracy: {accuracy * 100:.2f}%")

Epoch 1/20, Loss: 2.4693
Epoch 2/20, Loss: 1.6802
Epoch 3/20, Loss: 1.3016
Epoch 4/20, Loss: 1.0449
Epoch 5/20, Loss: 0.8632
Epoch 6/20, Loss: 0.7401
Epoch 7/20, Loss: 0.6211
Epoch 8/20, Loss: 0.4898
Epoch 9/20, Loss: 0.3948
Epoch 10/20, Loss: 0.3490
Epoch 11/20, Loss: 0.2655
Epoch 12/20, Loss: 0.2220
Epoch 13/20, Loss: 0.1595
Epoch 14/20, Loss: 0.1549
Epoch 15/20, Loss: 0.0927
Epoch 16/20, Loss: 0.0618
Epoch 17/20, Loss: 0.0503
Epoch 18/20, Loss: 0.0483
Epoch 19/20, Loss: 0.0308
Epoch 20/20, Loss: 0.0259
Neural Network Accuracy: 67.62%


In [516]:
'''
Model 2: SIMPLE NEURAL NETWORKS (Three Hidden Layers)
'''

import numpy as np

# Step 1: Initialize the Neural Network
class AdvancedNeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size, learning_rate=0.001):
        self.learning_rate = learning_rate

        # Initialize weights and biases for all layers
        self.weights = {
            "W1": np.random.randn(input_size, hidden_sizes[0]) * 0.01,
            "W2": np.random.randn(hidden_sizes[0], hidden_sizes[1]) * 0.01,
            "W3": np.random.randn(hidden_sizes[1], hidden_sizes[2]) * 0.01,
            "W4": np.random.randn(hidden_sizes[2], output_size) * 0.01,
        }
        self.biases = {
            "b1": np.zeros((1, hidden_sizes[0])),
            "b2": np.zeros((1, hidden_sizes[1])),
            "b3": np.zeros((1, hidden_sizes[2])),
            "b4": np.zeros((1, output_size)),
        }

    def relu(self, Z):
        """ReLU activation function."""
        return np.maximum(0, Z)

    def relu_derivative(self, Z):
        """Derivative of ReLU."""
        return Z > 0

    def softmax(self, Z):
        """Softmax activation function."""
        exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

    def cross_entropy_loss(self, y_pred, y_true):
        """Cross-entropy loss."""
        n_samples = y_true.shape[0]
        log_probs = -np.log(y_pred[range(n_samples), y_true])
        return np.sum(log_probs) / n_samples

    def forward(self, X):
        """Forward propagation."""
        # Layer 1
        self.Z1 = np.dot(X, self.weights["W1"]) + self.biases["b1"]
        self.A1 = self.relu(self.Z1)

        # Layer 2
        self.Z2 = np.dot(self.A1, self.weights["W2"]) + self.biases["b2"]
        self.A2 = self.relu(self.Z2)

        # Layer 3
        self.Z3 = np.dot(self.A2, self.weights["W3"]) + self.biases["b3"]
        self.A3 = self.relu(self.Z3)

        # Output Layer
        self.Z4 = np.dot(self.A3, self.weights["W4"]) + self.biases["b4"]
        self.A4 = self.softmax(self.Z4)

        return self.A4

    def backward(self, X, y_true, y_pred):
        """Backward propagation."""
        n_samples = y_true.shape[0]
        y_true_one_hot = np.zeros_like(y_pred)
        y_true_one_hot[range(n_samples), y_true] = 1

        # Gradients for the output layer
        dZ4 = y_pred - y_true_one_hot
        dW4 = np.dot(self.A3.T, dZ4) / n_samples
        db4 = np.sum(dZ4, axis=0, keepdims=True) / n_samples

        # Gradients for Layer 3
        dA3 = np.dot(dZ4, self.weights["W4"].T)
        dZ3 = dA3 * self.relu_derivative(self.Z3)
        dW3 = np.dot(self.A2.T, dZ3) / n_samples
        db3 = np.sum(dZ3, axis=0, keepdims=True) / n_samples

        # Gradients for Layer 2
        dA2 = np.dot(dZ3, self.weights["W3"].T)
        dZ2 = dA2 * self.relu_derivative(self.Z2)
        dW2 = np.dot(self.A1.T, dZ2) / n_samples
        db2 = np.sum(dZ2, axis=0, keepdims=True) / n_samples

        # Gradients for Layer 1
        dA1 = np.dot(dZ2, self.weights["W2"].T)
        dZ1 = dA1 * self.relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1) / n_samples
        db1 = np.sum(dZ1, axis=0, keepdims=True) / n_samples

        # Update weights and biases
        self.weights["W1"] -= self.learning_rate * dW1
        self.biases["b1"] -= self.learning_rate * db1

        self.weights["W2"] -= self.learning_rate * dW2
        self.biases["b2"] -= self.learning_rate * db2

        self.weights["W3"] -= self.learning_rate * dW3
        self.biases["b3"] -= self.learning_rate * db3

        self.weights["W4"] -= self.learning_rate * dW4
        self.biases["b4"] -= self.learning_rate * db4

    def train(self, X, y, epochs=20, batch_size=32):
        """Train the neural network."""
        n_samples = X.shape[0]

        for epoch in range(epochs):
            # Shuffle the data
            indices = np.arange(n_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch gradient descent
            for i in range(0, n_samples, batch_size):
                X_batch = X[i:i + batch_size]
                y_batch = y[i:i + batch_size]

                # Forward and backward propagation
                y_pred = self.forward(X_batch)
                self.backward(X_batch, y_batch, y_pred)

            # Compute loss
            y_pred_full = self.forward(X)
            loss = self.cross_entropy_loss(y_pred_full, y)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        """Predict labels for the input data."""
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)


# Step 2: Train and Evaluate the Neural Network
input_size = X_train.shape[1]  # Number of features (100 after PCA)
hidden_sizes = [1024, 512, 256]  # Three hidden layers
output_size = len(np.unique(y_train))  # Number of unique artists

# Initialize the neural network
advanced_nn = AdvancedNeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.001)

# Train the neural network
advanced_nn.train(X_train, y_train, epochs=100, batch_size=32)

# Evaluate on the test set
y_pred = advanced_nn.predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"Advanced Neural Network Accuracy: {accuracy * 100:.2f}%")

Epoch 1/100, Loss: 3.9022
Epoch 2/100, Loss: 3.8902
Epoch 3/100, Loss: 3.8738
Epoch 4/100, Loss: 3.8475
Epoch 5/100, Loss: 3.7983
Epoch 6/100, Loss: 3.7046
Epoch 7/100, Loss: 3.6032
Epoch 8/100, Loss: 3.5522
Epoch 9/100, Loss: 3.5164
Epoch 10/100, Loss: 3.4860
Epoch 11/100, Loss: 3.4565
Epoch 12/100, Loss: 3.4245
Epoch 13/100, Loss: 3.3877
Epoch 14/100, Loss: 3.3445
Epoch 15/100, Loss: 3.2957
Epoch 16/100, Loss: 3.2412
Epoch 17/100, Loss: 3.1817
Epoch 18/100, Loss: 3.1189
Epoch 19/100, Loss: 3.0545
Epoch 20/100, Loss: 2.9895
Epoch 21/100, Loss: 2.9240
Epoch 22/100, Loss: 2.8571
Epoch 23/100, Loss: 2.7886
Epoch 24/100, Loss: 2.7201
Epoch 25/100, Loss: 2.6526
Epoch 26/100, Loss: 2.5871
Epoch 27/100, Loss: 2.5248
Epoch 28/100, Loss: 2.4640
Epoch 29/100, Loss: 2.4080
Epoch 30/100, Loss: 2.3529
Epoch 31/100, Loss: 2.3040
Epoch 32/100, Loss: 2.2517
Epoch 33/100, Loss: 2.2051
Epoch 34/100, Loss: 2.1605
Epoch 35/100, Loss: 2.1143
Epoch 36/100, Loss: 2.0677
Epoch 37/100, Loss: 2.0265
Epoch 38/1

In [523]:
'''
Ensemble of Neural Networks:
An ensemble approach can combine multiple neural networks to improve performance.
Instead of training a single neural network, train multiple models and aggregate their predictions.
'''

# Step 1: Initialize and Train Multiple Neural Networks
ensemble_models = []
num_ensembles = 3  # Number of models in the ensemble

for i in range(num_ensembles):
    print(f"Training model {i + 1}/{num_ensembles}")
    model = SimpleNeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.01)
    model.train(X_train, y_train, epochs=20, batch_size=32)
    ensemble_models.append(model)

# Step 2: Predict with Ensemble
def ensemble_predict(models, X):
    """
    Aggregate predictions from multiple models.
    """
    ensemble_probs = np.zeros((X.shape[0], output_size))  # Store probabilities

    for model in models:
        y_pred = model.forward(X)  # Get probabilities from each model
        ensemble_probs += y_pred  # Aggregate probabilities

    # Average the probabilities
    ensemble_probs /= len(models)
    return np.argmax(ensemble_probs, axis=1)  # Final prediction

# Step 3: Evaluate the Ensemble
y_pred_ensemble = ensemble_predict(ensemble_models, X_test)
ensemble_accuracy = np.mean(y_pred_ensemble == y_test)
print(f"Ensemble Neural Network Accuracy: {ensemble_accuracy * 100:.2f}%")

Training model 1/3
Epoch 1/20, Loss: 1.8988
Epoch 2/20, Loss: 1.2811
Epoch 3/20, Loss: 0.9383
Epoch 4/20, Loss: 0.7332
Epoch 5/20, Loss: 0.5804
Epoch 6/20, Loss: 0.4733
Epoch 7/20, Loss: 0.3491
Epoch 8/20, Loss: 0.2662
Epoch 9/20, Loss: 0.2150
Epoch 10/20, Loss: 0.1735
Epoch 11/20, Loss: 0.1113
Epoch 12/20, Loss: 0.0824
Epoch 13/20, Loss: 0.0592
Epoch 14/20, Loss: 0.0460
Epoch 15/20, Loss: 0.0430
Epoch 16/20, Loss: 0.0277
Epoch 17/20, Loss: 0.0237
Epoch 18/20, Loss: 0.0206
Epoch 19/20, Loss: 0.0174
Epoch 20/20, Loss: 0.0150
Training model 2/3
Epoch 1/20, Loss: 1.9029
Epoch 2/20, Loss: 1.2896
Epoch 3/20, Loss: 0.9712
Epoch 4/20, Loss: 0.7964
Epoch 5/20, Loss: 0.5893
Epoch 6/20, Loss: 0.4650
Epoch 7/20, Loss: 0.4082
Epoch 8/20, Loss: 0.2992
Epoch 9/20, Loss: 0.2135
Epoch 10/20, Loss: 0.1581
Epoch 11/20, Loss: 0.1251
Epoch 12/20, Loss: 0.0828
Epoch 13/20, Loss: 0.0639
Epoch 14/20, Loss: 0.0441
Epoch 15/20, Loss: 0.0346
Epoch 16/20, Loss: 0.0290
Epoch 17/20, Loss: 0.0252
Epoch 18/20, Loss:

In [528]:
'''
Gradient Boosted Neural Networks:
This technique combines the power of gradient boosting with a neural network.
Each subsequent neural network learns to correct the errors made by the previous ones.
'''

from sklearn.metrics import log_loss

# Step 1: Initialize Parameters
n_boosting_rounds = 5  # Number of boosting rounds
learning_rate = 0.1  # Boosting learning rate
boosted_predictions = np.zeros((X_train.shape[0], output_size))

# Step 2: Train Boosted Neural Networks
for round_idx in range(n_boosting_rounds):
    print(f"Training boosting round {round_idx + 1}/{n_boosting_rounds}")

    # Train a neural network on residuals
    model = SimpleNeuralNetwork(input_size, hidden_sizes, output_size, learning_rate=0.01)
    model.train(X_train, y_train, epochs=20, batch_size=32)

    # Get predictions
    y_pred_train = model.forward(X_train)
    boosted_predictions += learning_rate * (y_pred_train - boosted_predictions)

    # Store the model for later use
    if round_idx == n_boosting_rounds - 1:
        final_model = model

# Step 3: Evaluate the Final Model
y_pred = np.argmax(boosted_predictions, axis=1)
boosting_accuracy = np.mean(y_pred == y_train)
print(f"Boosted Neural Network Accuracy: {boosting_accuracy * 100:.2f}%")

Training boosting round 1/5
Epoch 1/20, Loss: 2.4520
Epoch 2/20, Loss: 1.6524
Epoch 3/20, Loss: 1.2772
Epoch 4/20, Loss: 1.0270
Epoch 5/20, Loss: 0.8936
Epoch 6/20, Loss: 0.7271
Epoch 7/20, Loss: 0.6061
Epoch 8/20, Loss: 0.4662
Epoch 9/20, Loss: 0.3997
Epoch 10/20, Loss: 0.3173
Epoch 11/20, Loss: 0.2484
Epoch 12/20, Loss: 0.1999
Epoch 13/20, Loss: 0.1541
Epoch 14/20, Loss: 0.1185
Epoch 15/20, Loss: 0.0897
Epoch 16/20, Loss: 0.0712
Epoch 17/20, Loss: 0.0575
Epoch 18/20, Loss: 0.0374
Epoch 19/20, Loss: 0.0316
Epoch 20/20, Loss: 0.0255
Training boosting round 2/5
Epoch 1/20, Loss: 2.4616
Epoch 2/20, Loss: 1.7350
Epoch 3/20, Loss: 1.3171
Epoch 4/20, Loss: 1.0488
Epoch 5/20, Loss: 0.8663
Epoch 6/20, Loss: 0.7114
Epoch 7/20, Loss: 0.5947
Epoch 8/20, Loss: 0.5301
Epoch 9/20, Loss: 0.4157
Epoch 10/20, Loss: 0.3107
Epoch 11/20, Loss: 0.2795
Epoch 12/20, Loss: 0.2236
Epoch 13/20, Loss: 0.1444
Epoch 14/20, Loss: 0.1129
Epoch 15/20, Loss: 0.0866
Epoch 16/20, Loss: 0.0712
Epoch 17/20, Loss: 0.0493
