In [None]:
import os
import numpy as np
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [4]:
import zipfile
import os

# Define the path to the ZIP file
zip_file_path = 'fma_img.zip'

# Define the path to the folder where you want to extract the files
extract_folder_path = 'images'

try:
    # Create the extract folder if it doesn't exist
    if not os.path.exists(extract_folder_path):
        os.makedirs(extract_folder_path)
        print(f"Folder {extract_folder_path} created successfully.")
    else:
        print(f"Folder {extract_folder_path} already exists.")

    # Extract the ZIP file to the extract folder
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_folder_path)
    print(f"Files extracted to {extract_folder_path} successfully.")
except FileNotFoundError:
    print(f"Error: The file {zip_file_path} was not found.")
except PermissionError:
    print(f"Error: You don't have permission to create the folder {extract_folder_path} or extract the ZIP file.")
except Exception as e:
    print(f"An error occurred: {e}")

Folder images already exists.
Files extracted to images successfully.


In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, LayerNormalization, MultiHeadAttention, Dense, GlobalAveragePooling1D, Dropout, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.activations import gelu
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the path to the main directory containing subdirectories with images

main_dir = r'C:\Users\Plaksha\3D Objects\DL\fma_img\fma_img'




In [3]:
# Define image dimensions and batch size
img_height, img_width = 100, 100
batch_size = 16

# Data Generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # Splitting data for validation
)
train_generator = train_datagen.flow_from_directory(
    main_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)
validation_generator = train_datagen.flow_from_directory(
    main_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)



Found 230 images belonging to 13 classes.
Found 50 images belonging to 13 classes.


In [5]:
# Parameters for Multi-Head Attention
num_heads = 2  # Number of attention heads
head_size = 32  # Dimension of each attention head
ff_dim = 64  # Hidden layer size in feed forward network inside transformer
dropout_rate = 0.1

# Build the model with an MLP head
inputs = Input(shape=(img_height, img_width, 3))
x = Conv2D(32, (3, 3), activation='relu')(inputs)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Reshape(target_shape=(-1, x.shape[-1]))(x)  # Flatten for attention
x = LayerNormalization(epsilon=1e-6)(x)
x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout_rate)(x, x)
x = Dropout(dropout_rate)(x)
x = Dense(ff_dim)(x)
x = Activation(gelu)(x)
x = Dropout(dropout_rate)(x)
x = Dense(ff_dim)(x)
x = Activation(gelu)(x)
x = GlobalAveragePooling1D()(x)  # Using 1D pooling for flattened features
outputs = Dense(len(train_generator.class_indices), activation='softmax')(x)

model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
history = model.fit(train_generator, epochs=40, validation_data=validation_generator)

# Display training results
print("Training Accuracy:", history.history['accuracy'][-1])
print("Validation Accuracy:", history.history['val_accuracy'][-1])
print("Training Loss:", history.history['loss'][-1])
print("Validation Loss:", history.history['val_loss'][-1])





Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 100, 100, 3)]        0         []                            
                                                                                                  
 conv2d_2 (Conv2D)           (None, 98, 98, 32)           896       ['input_3[0][0]']             
                                                                                                  
 max_pooling2d_2 (MaxPoolin  (None, 49, 49, 32)           0         ['conv2d_2[0][0]']            
 g2D)                                                                                             
                                                                                                  
 reshape_2 (Reshape)         (None, 2401, 32)             0         ['max_pooling2d_2[0][0]'

In [None]:
# Function to preprocess the uploaded image
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_height, img_width))
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

# Function to calculate embeddings
def calculate_embeddings(image_path):
    img_array = preprocess_image(image_path)
    embeddings = model.predict(img_array)
    return embeddings

# Function to open file dialog and get the image path
def browse_file():
    file_path = filedialog.askopenfilename()
    image_path_var.set(file_path)
    if file_path:
        img = Image.open(file_path)
        img = img.resize((200, 200), Image.ANTIALIAS)
        img = ImageTk.PhotoImage(img)
        image_label.config(image=img)
        image_label.image = img

# Function to match embeddings and determine class
def match_embeddings():
    image_path = image_path_var.get()
    if image_path:
        uploaded_embeddings = calculate_embeddings(image_path)
        min_distance = float('inf')
        min_class = None
        for subdir in os.listdir(main_dir):
            subdir_path = os.path.join(main_dir, subdir)
            for img_file in os.listdir(subdir_path):
                img_path = os.path.join(subdir_path, img_file)
                embeddings = calculate_embeddings(img_path)
                distance = np.linalg.norm(uploaded_embeddings - embeddings)
                if distance < min_distance:
                    min_distance = distance
                    min_class = subdir
        result_var.set(f"The song clip belongs to class: {min_class}")

# Create tkinter window
root = tk.Tk()
root.title("Image Classification")

# Create widgets
browse_button = tk.Button(root, text="Upload Image", command=browse_file)
browse_button.pack(pady=10)

image_label = tk.Label(root)
image_label.pack(pady=10)

image_path_var = tk.StringVar()
image_path_entry = tk.Entry(root, textvariable=image_path_var, state='readonly')
image_path_entry.pack(pady=10)

classify_button = tk.Button(root, text="Classify Image", command=match_embeddings)
classify_button.pack(pady=10)

result_var = tk.StringVar()
result_label = tk.Label(root, textvariable=result_var)
result_label.pack(pady=10)

# Function to close the tkinter window
def close_window():
    root.destroy()

# Bind closing event to close_window function
root.protocol("WM_DELETE_WINDOW", close_window)

# Run the tkinter event loop
root.mainloop()
