In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cifar-10/trainLabels.csv
/kaggle/input/cifar-10/sampleSubmission.csv
/kaggle/input/cifar-10/test.7z
/kaggle/input/cifar-10/train.7z
/kaggle/input/cifar-10-resnet152-97-5-acc/__results__.html
/kaggle/input/cifar-10-resnet152-97-5-acc/fine_tuned_model_alternative.h5
/kaggle/input/cifar-10-resnet152-97-5-acc/submission.csv
/kaggle/input/cifar-10-resnet152-97-5-acc/__notebook__.ipynb
/kaggle/input/cifar-10-resnet152-97-5-acc/__output__.json
/kaggle/input/cifar-10-resnet152-97-5-acc/custom.css
/kaggle/input/cifar-10-resnet152-97-5-acc/__results___files/__results___8_7.png
/kaggle/input/cifar-10-resnet152-97-5-acc/__results___files/__results___8_6.png


## Install py7zr Package

#####  py7zr is a library and utility to support 7zip archive compression, decompression, encryption and decryption written by Python programming language.

In [2]:
pip install py7zr

Collecting py7zr
  Downloading py7zr-0.21.1-py3-none-any.whl.metadata (17 kB)
Collecting pycryptodomex>=3.16.0 (from py7zr)
  Downloading pycryptodomex-3.20.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting pyzstd>=0.15.9 (from py7zr)
  Downloading pyzstd-0.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.4 kB)
Collecting pyppmd<1.2.0,>=1.1.0 (from py7zr)
  Downloading pyppmd-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)
Collecting pybcj<1.1.0,>=1.0.0 (from py7zr)
  Downloading pybcj-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Collecting multivolumefile>=0.2.3 (from py7zr)
  Downloading multivolumefile-0.2.3-py3-none-any.whl.metadata (6.3 kB)
Collecting inflate64<1.1.0,>=1.0.0 (from py7zr)
  Downloading inflate64-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)
Downloading py7zr-0.21.1-py3-none-any.w

## Extract Training Dataset from 7zr archive

In [3]:
import py7zr

# Define paths for extract and archive
archive_path = '../input/cifar-10/train.7z'
extract_folder = '/kaggle/temp'

# Extract the 7zr archive
with py7zr.SevenZipFile(archive_path, mode='r') as archive:
    archive.extractall(path=extract_folder)

# Load the labels CSV file
train_labels = pd.read_csv('/kaggle/input/cifar-10/trainLabels.csv')

# Define paths for train and validation datsets
dataset_path_train = '/kaggle/temp/train'
dataset_path_valid = '/kaggle/temp/valid'

# Define the class names
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Create directories for each class in train and validation folders
for label in classes:
    train_path = os.path.join(dataset_path_train, label)
    os.makedirs(train_path, exist_ok=True)
    
    valid_path = os.path.join(dataset_path_valid, label)
    os.makedirs(valid_path, exist_ok=True)

# Iterate over each label in the CSV    
for (int_ind, row) in train_labels.iterrows():
    # Construct the image file name
    id = str(row["id"]) + ".png"
    source_path = os.path.join(dataset_path_train, id)
    
    # Check if the image file exists
    if not os.path.exists(source_path):
        print(f"Missing source File: {source_path}")
        continue
        
    # Randomly assign the image to train (80%) or validation (20%) set
    p = np.random.random()
    if p <= 0.8:
        target_path = os.path.join(dataset_path_train, row["label"], id)
    else:
        target_path = os.path.join(dataset_path_valid, row["label"], id)
    
    # Move the image file to the corresponding target directory
    os.replace(source_path, target_path)
    
print("Processing Completed")

Processing Completed


## Extract Test Dataset from 7zr archive

In [4]:
# Define the path to the 7-zip archive containing the test data
archive_path = '/kaggle/input/cifar-10/test.7z'

# Define the directory where the contents of the archive will be extracted
extract_folder = '/kaggle/temp/test'

# Open the 7-zip archive in read mode
with py7zr.SevenZipFile(archive_path, mode='r') as archive:
    # Extract all the contents of the archive to the specified directory
    archive.extractall(path=extract_folder)


In [5]:
# Get the number of files in the test directory
len(os.listdir('/kaggle/temp/test/test'))

# Define the path to the test directory
path = '/kaggle/temp/test/test'

# List all files in the test directory
f = os.listdir(path)

# Create a new list with the full path of each file
new = [str(path) + '/' + s for s in f]


## Fastai vision
**Fastai is a high-level deep learning library built on top of PyTorch and provides a simplified interface for training neural networks with best practices in deep learning**

# Data Loader
**A `data loader` specifically refers to an object that encapsulates the training, validation, and optionally test datasets in a format suitable for training deep learning models.**

It provides a high level interface to specify:
* Data Source
* Data Augmentation
* Batching
* Normalization

In [6]:
# Import all necessary functions and classes from fastai.vision
from fastai.vision.all import *  

# Define the Image Data Loaders
dls = ImageDataLoaders.from_folder(
    path='/kaggle/temp',              # Path to the folder containing 'train' and 'valid' subdirectories
    train='train',                    # Name of the training data subdirectory
    valid='valid',                    # Name of the validation data subdirectory
    item_tfms=Resize(224),            # Resize all images to 224x224 pixels
    bs=64,                            # Batch size of 64
    batch_tfms=[*aug_transforms(),    # Apply data augmentation transformations
                Normalize.from_stats(*imagenet_stats)]  # Normalize images using ImageNet statistics
)



# CNN Learner

**A `Learner` in Fastai is an object that encapsulates the model architecture, the data loaders (created using the DataBlock API), and various other settings necessary for training and evaluating a deep learning model.**

In [7]:
# Create a learner with a pre-trained ResNet152 model
learn = cnn_learner(dls, resnet152, metrics=error_rate)

# Fine-tune the model for 5 epochs
learn.fine_tune(5)  

  warn("`cnn_learner` has been renamed to `vision_learner` -- please update your code")
Downloading: "https://download.pytorch.org/models/resnet152-f82ba261.pth" to /root/.cache/torch/hub/checkpoints/resnet152-f82ba261.pth
100%|██████████| 230M/230M [00:02<00:00, 86.2MB/s]


epoch,train_loss,valid_loss,error_rate,time
0,0.768743,0.490524,0.161874,06:27


epoch,train_loss,valid_loss,error_rate,time
0,0.258957,0.158522,0.052785,08:31
1,0.180731,0.115912,0.037503,08:31
2,0.11047,0.104845,0.034989,08:31
3,0.05081,0.087786,0.025035,08:32
4,0.024632,0.083525,0.023929,08:31


# Predictions on Test Set

In [8]:
# Create a test DataLoader from the list of file paths
test_dl = learn.dls.test_dl(new)

# Get predictions for the test data
class_score, y = learn.get_preds(dl=test_dl)

# Get the predicted class indices
class_score = np.argmax(class_score, axis=1)

# Get the prediction for the second test image (index 1)
class_score[1].item()

# Convert the predicted class indices to a list
classScore = class_score.tolist()

# Get the vocabulary of classes
learn.dls.vocab

# Define a dictionary mapping class indices to class names
classes = {0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat', 4: 'deer', 5: 'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck'}

# Initialize an empty array to store the predicted classes
predicted_classes = np.empty(shape=300000, dtype=np.dtype('U20'))


In [9]:
# Initialize an index variable
index = 0

# Loop through the list of predicted class indices
for i in classScore:
    # Assign the class name corresponding to the class index to the predicted_classes array
    predicted_classes[index] = classes[i]
    
    # Increment the index for the next iteration
    index += 1


In [10]:
import os
import pandas as pd

# Define the directory containing the test images
directory = '/kaggle/temp/test/test'

# Extract image IDs by filtering out non-digit characters from the file names
ImageId = [ (''.join(filter(str.isdigit, name))) for name in os.listdir(directory)]

# Create a DataFrame for the submission with 'id' and 'label' columns
submission = pd.DataFrame({
    "id": ImageId,          # Assign the extracted image IDs to the 'id' column
    "label": predicted_classes  # Assign the predicted class names to the 'label' column
})

# Convert the 'id' column to integers
submission['id'] = submission['id'].astype(int)

# Save the DataFrame to a CSV file for submission, without the index
submission.to_csv('submission.csv', index=False)


# Comparison of Diffrent Models

In [11]:
# Importing necessary libraries
from tabulate import tabulate

# Data for the table
data = [
    ["CNN", "28%"],
    ["ResNet50", "46%"],
    ["ResNet152 (Fine-tuned)", "97%"]
]

# Headers for the table
headers = ["Model", "Accuracy"]

# Printing the table using tabulate
table = tabulate(data, headers, tablefmt="fancy_grid")

# Displaying the table
print(table)


╒════════════════════════╤════════════╕
│ Model                  │ Accuracy   │
╞════════════════════════╪════════════╡
│ CNN                    │ 28%        │
├────────────────────────┼────────────┤
│ ResNet50               │ 46%        │
├────────────────────────┼────────────┤
│ ResNet152 (Fine-tuned) │ 97%        │
╘════════════════════════╧════════════╛
