In [None]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

# Specify the path to the 'mhist.zip' file
mhist_zip_path = '../ProjectA/mhist_dataset'

# Load annotations from the CSV file
annotations_path = "../ProjectA/mhist_dataset/annotations.csv"
annotations_df = pd.read_csv(annotations_path, delimiter=',')

# Filter and split data based on the 'Partition' column
train_annotations = annotations_df[annotations_df['Partition'] == 'train']
test_annotations = annotations_df[annotations_df['Partition'] == 'test']

# Path to the directory containing the images
images_dir = "../ProjectA/mhist_dataset/images"

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    img = Image.open(image_path)
    img = img.resize((64, 64))  # Assuming your images are 64x64 pixels
    img_array = np.array(img) / 255.0
    return img_array

# Load images and labels
all_image_paths = [os.path.join(images_dir, img_name) for img_name in annotations_df['Image Name']]
all_labels = annotations_df['Partition'].values

label_encoder = LabelEncoder()
all_labels_encoded = label_encoder.fit_transform(all_labels)

# Split data into train and test sets
train_image_paths, test_image_paths, train_labels, test_labels = train_test_split(
    all_image_paths, all_labels_encoded, test_size=0.2, random_state=42
)

# Load and preprocess images
train_data = np.array([load_and_preprocess_image(img_path) for img_path in tqdm(train_image_paths)])
test_data = np.array([load_and_preprocess_image(img_path) for img_path in tqdm(test_image_paths)])

# Define your model
num_classes = len(np.unique(all_labels_encoded))  # Assuming labels are integers
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=optimizers.SGD(learning_rate=0.01),
              loss='sparse_categorical_crossentropy',  # Adjust the loss function accordingly
              metrics=['accuracy'])

# Train the model
history = model.fit(train_data, train_labels, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_data, test_labels)
print(f'Test accuracy: {test_acc}')