# This notebook represents our basemodel
The model will predict each time the majority class that we have in our data


In [1]:
import os
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from collections import Counter

# Load the class distribution (number of images per folder).
def load_class_distribution(data_path):
    class_counts = {}
    for folder_name in os.listdir(data_path):
        folder_path = os.path.join(data_path, folder_name)
        if os.path.isdir(folder_path):
            num_files = len([f for f in os.listdir(folder_path) if f.lower().endswith((".jpg", ".jpeg", ".png"))])
            class_counts[folder_name] = num_files
    return class_counts

# Predict the majority class for all samples.
def majority_class_prediction(y_true):
    majority_class = Counter(y_true).most_common(1)[0][0]
    y_pred = [majority_class] * len(y_true)
    return y_pred, majority_class

# Evaluate a baseline model that always predicts the majority class.
def evaluate_baseline_model(data_path):

    # Load class distribution
    class_counts = load_class_distribution(data_path)
    print("Class Distribution:", class_counts)

    # Generate ground truth (labels) for evaluation
    y_true = []
    class_to_label = {cls: idx for idx, cls in enumerate(class_counts.keys())}  # Map classes to labels
    for cls, count in class_counts.items():
        y_true.extend([class_to_label[cls]] * count)

    # Predict the majority class
    y_pred, majority_class = majority_class_prediction(y_true)
    print(f"Majority Class (Predicted): {list(class_counts.keys())[majority_class]}")

    # Calculate metrics with zero_division handling
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

    # Print metrics
    print("\nBaseline Model Performance:")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

    # Print classification report with zero_division handling
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=class_counts.keys(), zero_division=0))

if __name__ == "__main__":
    # Path to the filtered dataset
    data_path = r"C:\Users\yozev\OneDrive\Desktop\artFiltered"

    # Evaluate the baseline model
    evaluate_baseline_model(data_path)


Class Distribution: {'Abstract_Expressionism': 4954, 'Art_Nouveau_Modern': 4998, 'Baroque': 4994, 'Cubism': 5006, 'Expressionism': 5000, 'Impressionism': 5000, 'Naive_Art_Primitivism': 4936, 'Northern_Renaissance': 4952, 'Post_Impressionism': 5000, 'Realism': 5000, 'Rococo': 4909, 'Romanticism': 5000, 'Symbolism': 5002}
Majority Class (Predicted): Cubism

Baseline Model Performance:
Accuracy: 0.0773
Precision: 0.0060
Recall: 0.0773
F1 Score: 0.0111

Classification Report:
                        precision    recall  f1-score   support

Abstract_Expressionism       0.00      0.00      0.00      4954
    Art_Nouveau_Modern       0.00      0.00      0.00      4998
               Baroque       0.00      0.00      0.00      4994
                Cubism       0.08      1.00      0.14      5006
         Expressionism       0.00      0.00      0.00      5000
         Impressionism       0.00      0.00      0.00      5000
 Naive_Art_Primitivism       0.00      0.00      0.00      4936
  Northern