In [1]:
# Step 1: Extract the Zip File
import os
import zipfile

In [2]:
# Define the path to the zip file (update the path as necessary)
zip_file_path = os.path.join(os.path.expanduser('~'), 'Downloads', 'archive (7).zip')

In [3]:
# Define a directory to extract the contents
extracted_dir_path = 'extracted_contents'

In [4]:
# Create the directory if it doesn't exist
os.makedirs(extracted_dir_path, exist_ok=True)

In [5]:
# Unzip the archive
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_dir_path)

In [6]:
# Step 2: Load Images and Labels
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [7]:
def load_images_and_labels(csv_path, image_dir):
    df = pd.read_csv(csv_path)
    images = []
    labels = []
    for _, row in df.iterrows():
        img_path = os.path.join(image_dir, row['Image'])
        img = load_img(img_path, target_size=(128, 128))  # Resize images to 128x128
        img_array = img_to_array(img)
        images.append(img_array)
        labels.append(row['Label'])
    return np.array(images), np.array(labels)

In [8]:
# Construct paths to the CSV files and directories
train_csv_path = os.path.join(extracted_dir_path, 'train.csv')
test_csv_path = os.path.join(extracted_dir_path, 'test.csv')
train_dir = os.path.join(extracted_dir_path, 'train')
test_dir = os.path.join(extracted_dir_path, 'test')

In [9]:
# Construct paths to the CSV files and directories
train_csv_path = os.path.join(extracted_dir_path, 'train.csv')
test_csv_path = os.path.join(extracted_dir_path, 'test.csv')
train_dir = os.path.join(extracted_dir_path, 'train')
test_dir = os.path.join(extracted_dir_path, 'test')

In [10]:
# Step 3: Split Data into Training and Validation Sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

NameError: name 'X' is not defined

In [None]:
# Step 4: Normalize and Prepare Data
X_train = X_train / 255.0
X_val = X_val / 255.0
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

In [None]:
# Step 5: Preprocess the Dataset
import matplotlib.pyplot as plt

In [None]:
def display_sample_images(images, labels, class_names):
    plt.figure(figsize=(10, 10))
    for i in range(25):
        plt.subplot(5, 5, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(images[i].astype('uint8'))
        plt.xlabel(class_names[labels[i]])
    plt.show()

In [None]:
# Display a sample of images and labels
class_names = [str(i) for i in range(len(np.unique(y)))]
display_sample_images(X_train, y_train.argmax(axis=1), class_names)

In [None]:
# Check the distribution of labels in the training set
label_counts = pd.Series(y_train.argmax(axis=1)).value_counts()
print("Label distribution in the training set:\n", label_counts)

In [None]:
# Perform data augmentation (if necessary)
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
# Fit the data generator to the training data
datagen.fit(X_train)

# Example of displaying augmented images
for X_batch, y_batch in datagen.flow(X_train, y_train, batch_size=9):
    plt.figure(figsize=(10, 10))
    for i in range(9):
        plt.subplot(3, 3, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(X_batch[i].astype('uint8'))
        plt.xlabel(class_names[y_batch[i].argmax()])
    plt.show()
    break  # Display only one batch of augmented images

print("Data preprocessing complete.")

In [None]:
# Step 6: Build and Train the Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [None]:
# Build a simple CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(len(np.unique(y)), activation='softmax')  # Number of classes
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
model.fit(X_train, y_train, epochs=3, validation_data=(X_val, y_val))


In [None]:
# Step 7: Prepare Test Data and Make Predictions
X_test, _ = load_images_and_labels(test_csv_path, test_dir)
X_test = X_test / 255.0
predictions = model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

In [None]:
# Save predictions to submission.csv
submission_df = pd.read_csv(os.path.join(extracted_dir_path, 'submission.csv'))
submission_df['Label'] = predicted_labels
submission_df.to_csv('submission.csv', index=False)
print("Model training complete and predictions saved to submission.csv.")