### Import required libraries


In [87]:
import os
import zipfile
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, layers, models
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization, UpSampling2D
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras import optimizers

### Data extraction and setup


In [88]:
def extract_and_setup_data(zip_path, extract_path):
    """
    Extract CIFAR-10 dataset from ZIP file and set up directory structure
    
    Parameters:
    zip_path (str): Path to CIFAR-10 ZIP file
    extract_path (str): Path where to extract the data
    
    Returns:
    str: Path to extracted data
    """
    print("EXTRACTING DATASET")
    print("-" * 30)
    
    try:
        # Extract ZIP file
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        
        filenames = os.listdir(extract_path)
        print(f"Dataset extracted successfully")
        print(f"Total files: {len(filenames)}")
        print(f"Sample files: {filenames[:5]}")
        
        return extract_path
        
    except FileNotFoundError:
        print("ZIP file not found. Using alternative data loading...")
        return None
    except Exception as e:
        print(f"Error extracting data: {e}")
        return None

In [89]:
def load_and_explore_labels(extract_path):
    """
    Load and explore the CIFAR-10 labels
    
    Parameters:
    extract_path (str): Path to extracted dataset
    
    Returns:
    tuple: DataFrame with labels and processed labels list
    """
    print(f"\nLOADING AND EXPLORING LABELS")
    print("-" * 30)
    
    try:
        # Load labels CSV file
        labels_df = pd.read_csv(os.path.join(extract_path, 'trainlabels.csv'))
        
        print(f"Labels loaded: {labels_df.shape[0]} samples")
        print(f"Label distribution:")
        
        label_counts = labels_df['label'].value_counts()
        for label, count in label_counts.items():
            print(f"   - {label}: {count} images")
        
        # Create label dictionary for encoding
        labels_dictionary = {
            'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4,
            'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9
        }
        
        # Convert labels to numerical format
        labels = [labels_dictionary[label] for label in labels_df['label']]
        
        print(f"Labels encoded to numerical format")
        print(f"Sample encoded labels: {labels[:5]}")
        
        return labels_df, labels, labels_dictionary
        
    except Exception as e:
        print(f"❌ Error loading labels: {e}")
        return None, None, None

In [90]:
def load_and_process_images(extract_path, labels_df):
    """
    Load and process CIFAR-10 images
    
    Parameters:
    extract_path (str): Path to extracted dataset
    labels_df (DataFrame): DataFrame containing image IDs
    
    Returns:
    list: Processed image data as numpy arrays
    """
    print(f"\nLOADING AND PROCESSING IMAGES")
    print("-" * 30)
    
    try:
        id_list = list(labels_df['id'])
        train_data_folder = os.path.join(extract_path, 'train')
        
        data = []
        processed_count = 0
        
        print(f"Processing {len(id_list)} images...")
        
        for i, img_id in enumerate(id_list):
            try:
                img_path = os.path.join(train_data_folder, f"{img_id}.png")
                img = Image.open(img_path)
                img_array = np.array(img)
                data.append(img_array)
                processed_count += 1
                
                # Progress indicator
                if (i + 1) % 10000 == 0:
                    print(f"   Processed {i + 1}/{len(id_list)} images...")
                    
            except Exception as e:
                print(f"   Warning: Could not process image {img_id}: {e}")
        
        print(f"Successfully processed: {processed_count} images")
        print(f"Image shape: {data[0].shape if data else 'N/A'}")
        
        return data
        
    except Exception as e:
        print(f"Error processing images: {e}")
        return []

### Data paths configuration


In [91]:
ZIP_PATH = r'C:\Users\Mohamed Makki\Desktop\cifar-10.zip'
EXTRACT_PATH = r'C:\Users\Mohamed Makki\Desktop\cifar-10'

### Load data or use alternative


In [92]:
try:
    # Try to extract and load real data
    extracted_path = extract_and_setup_data(ZIP_PATH, EXTRACT_PATH)
    
    if extracted_path:
        labels_df, labels, labels_dict = load_and_explore_labels(extracted_path)
        image_data = load_and_process_images(extracted_path, labels_df)
        
        if image_data and labels:
            X = np.array(image_data)
            y = np.array(labels)
            print(f"\nREAL DATA LOADED")
            print(f"Images shape: {X.shape}")
            print(f"Labels shape: {y.shape}")
        else:
            raise Exception("Failed to process real data")
    else:
        raise Exception("Failed to extract data")
        
except Exception as e:
    print(f"\nCREATING SAMPLE DATA")
    print("-" * 30)
    print(f"Note: {e}")
    print("Using synthetic CIFAR-10 like data for demonstration...")
    
    # Create sample data that matches CIFAR-10 structure
    X = np.random.randint(0, 255, (5000, 32, 32, 3), dtype=np.uint8)
    y = np.random.randint(0, 10, 5000)
    
    labels_dict = {
        'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4,
        'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9
    }
    
    print(f"Sample data created: {X.shape}, {y.shape}")

EXTRACTING DATASET
------------------------------
Dataset extracted successfully
Total files: 6
Sample files: ['sampleSubmission.csv', 'test', 'test.7z', 'train', 'train.7z']

LOADING AND EXPLORING LABELS
------------------------------
Labels loaded: 50000 samples
Label distribution:
   - frog: 5000 images
   - truck: 5000 images
   - deer: 5000 images
   - automobile: 5000 images
   - bird: 5000 images
   - horse: 5000 images
   - ship: 5000 images
   - cat: 5000 images
   - dog: 5000 images
   - airplane: 5000 images
Labels encoded to numerical format
Sample encoded labels: [6, 9, 9, 4, 1]

LOADING AND PROCESSING IMAGES
------------------------------
Processing 50000 images...
   Processed 10000/50000 images...
   Processed 20000/50000 images...
   Processed 30000/50000 images...
   Processed 40000/50000 images...
   Processed 50000/50000 images...
Successfully processed: 50000 images
Image shape: (32, 32, 3)

REAL DATA LOADED
Images shape: (50000, 32, 32, 3)
Labels shape: (50000,)


### Data preprocessing and splitting

In [93]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Normalize pixel values to 0-1 range
X_train_scaled = X_train.astype('float32') / 255.0
X_test_scaled = X_test.astype('float32') / 255.0

print(f"Training set: {X_train_scaled.shape[0]} images")
print(f"Testing set: {X_test_scaled.shape[0]} images")
print(f"Pixel values normalized to [0, 1] range")
print(f"Input shape: {X_train_scaled.shape[1:]}")

Training set: 40000 images
Testing set: 10000 images
Pixel values normalized to [0, 1] range
Input shape: (32, 32, 3)


### Build transfer learning model with ResNet50

In [96]:
def create_transfer_learning_model(input_shape=(32, 32, 3), num_classes=10):
    """
    Create transfer learning model using ResNet50
    
    Parameters:
    input_shape (tuple): Shape of input images
    num_classes (int): Number of output classes
    
    Returns:
    keras.Sequential: Compiled transfer learning model
    """
    print(f"\nBUILDING TRANSFER LEARNING MODEL")
    print("-" * 30)
    
    # Load pre-trained ResNet50 (without top layers)
    base_model = ResNet50(
        weights='imagenet',
        include_top=False,
        input_shape=(256, 256, 3)  # ResNet50 requires larger input
    )
    
    print(f"ResNet50 base model loaded")
    print(f"Base model parameters: {base_model.count_params():,}")
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Build complete model
    model = models.Sequential([
        # Upsampling layers to match ResNet50 input requirements
        UpSampling2D((2, 2), input_shape=input_shape),  # 32x32 -> 64x64
        UpSampling2D((2, 2)),                          # 64x64 -> 128x128
        UpSampling2D((2, 2)),                          # 128x128 -> 256x256
        
        # Pre-trained ResNet50 base
        base_model,
        
        # Custom classification head
        Flatten(),
        BatchNormalization(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        BatchNormalization(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        BatchNormalization(),
        Dense(num_classes, activation='softmax')
    ])
    
    # Compile model with lower learning rate for transfer learning
    model.compile(
        optimizer=optimizers.RMSprop(learning_rate=2e-5),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    print(f"Transfer learning model created")
    print(f"Total parameters: {model.count_params():,}")
    print(f"Trainable parameters: {np.sum([tf.keras.backend.count_params(p) for p in model.non_trainable_weights]):,}")
    
    return model

### Create and train the model


In [97]:
model = create_transfer_learning_model()

print(f"\nTRAINING TRANSFER LEARNING MODEL")
print("-" * 30)

# Train the model
history = model.fit(
    X_train_scaled, y_train,
    epochs=10,
    validation_split=0.1,
    batch_size=32,
    verbose=1
)

print(f"Transfer learning model training completed")


BUILDING TRANSFER LEARNING MODEL
------------------------------
ResNet50 base model loaded
Base model parameters: 23,587,712
Transfer learning model created
Total parameters: 40,899,018
Trainable parameters: 23,850,240

TRAINING TRANSFER LEARNING MODEL
------------------------------


  super().__init__(**kwargs)


Epoch 1/10
[1m  21/1125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m35:18[0m 2s/step - accuracy: 0.1001 - loss: 2.9564

KeyboardInterrupt: 

### Model evaluation and visualization


In [None]:
# Evaluate on test set

test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)

print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Test Loss: {test_loss:.4f}")

# Get training history
final_train_accuracy = history.history['accuracy'][-1]
final_val_accuracy = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_accuracy:.4f}")
print(f"Final Validation Accuracy: {final_val_accuracy:.4f}")

In [None]:
# Create training plots

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

# Loss plot
ax1.plot(history.history['loss'], label='Training Loss')
ax1.plot(history.history['val_loss'], label='Validation Loss')
ax1.set_title('Model Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)

# Accuracy plot
ax2.plot(history.history['accuracy'], label='Training Accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax2.set_title('Model Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

### Prediction system for CIFAR-10 classification


In [None]:
def predict_cifar10_class(model, image_array, labels_dict):
    """
    Predict CIFAR-10 class for a single image
    
    Parameters:
    model: Trained transfer learning model
    image_array: Input image as numpy array (32x32x3)
    labels_dict: Dictionary mapping class names to indices
    
    Returns:
    tuple: Prediction result and confidence
    """
    try:
        # Preprocess the image
        if image_array.shape != (32, 32, 3):
            print(f"Warning: Expected shape (32, 32, 3), got {image_array.shape}")
        
        # Normalize and reshape
        image_processed = image_array.astype('float32') / 255.0
        image_batch = np.expand_dims(image_processed, axis=0)
        
        # Make prediction
        predictions = model.predict(image_batch, verbose=0)
        predicted_class = np.argmax(predictions[0])
        confidence = np.max(predictions[0])
        
        # Get class name
        class_names = list(labels_dict.keys())
        predicted_class_name = class_names[predicted_class]
        
        return predicted_class, predicted_class_name, confidence
        
    except Exception as e:
        return None, f"Error: {e}", 0.0

### Test with a sample from test set

In [None]:
if len(X_test) > 0:
    sample_idx = 0
    sample_image = X_test[sample_idx]
    actual_class = y_test[sample_idx]
    
    pred_class, pred_name, confidence = predict_cifar10_class(model, sample_image, labels_dict)
    actual_name = list(labels_dict.keys())[actual_class]
    
    print(f"\nSample Prediction:")
    print(f"Actual class: {actual_class} ({actual_name})")
    print(f"Predicted class: {pred_class} ({pred_name})")
    print(f"Confidence: {confidence:.4f} ({confidence*100:.2f}%)")
    print(f"Prediction {'Correct' if pred_class == actual_class else 'Incorrect'}")