In [32]:
import zipfile
import os

def extract_zip_file(zip_file_path, dest_path):
    # Ensure the destination path exists
    if not os.path.exists(dest_path):
        os.makedirs(dest_path)

    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(dest_path)
        

# Use the function
zip_file_path = 'archive.zip'  # replace with your zip file path
dest_path = os.getcwd()  # get current working directory
extract_zip_file(zip_file_path, dest_path)


In [33]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the .txt file as a pandas DataFrame
def load_labels(file_path):
    df = pd.read_csv(file_path, sep=" ", header=None)
    df.columns = ["filename", "target"]
    df['filename'] = df.apply(lambda row: os.path.join(str(row['target']), row['filename']), axis=1)
    df["target"] = df["target"].astype(str)
    df['filename'] = df['filename'].astype(str)  # Convert class column to string type
    return df

train_df = load_labels("train.txt")
test_df = load_labels("test.txt")
val_df = load_labels("val.txt")

In [34]:
train_df

Unnamed: 0,filename,target
0,0/00002.jpg,0
1,0/00003.jpg,0
2,0/00005.jpg,0
3,0/00006.jpg,0
4,0/00008.jpg,0
...,...,...
45090,101/75212.jpg,101
45091,101/75214.jpg,101
45092,101/75216.jpg,101
45093,101/75217.jpg,101


In [35]:
val_df

Unnamed: 0,filename,target
0,0/00009.jpg,0
1,0/00012.jpg,0
2,0/00014.jpg,0
3,0/00034.jpg,0
4,0/00035.jpg,0
...,...,...
7503,101/75187.jpg,101
7504,101/75191.jpg,101
7505,101/75202.jpg,101
7506,101/75206.jpg,101


In [28]:
test_df

Unnamed: 0,filename,target
0,0/00000.jpg,0
1,0/00001.jpg,0
2,0/00004.jpg,0
3,0/00007.jpg,0
4,0/00010.jpg,0
...,...,...
22614,101/75200.jpg,101
22615,101/75213.jpg,101
22616,101/75215.jpg,101
22617,101/75218.jpg,101


In [36]:
batch_size = 32
img_height = 64
img_width = 64

# Create ImageDataGenerators
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, zoom_range=0.2, shear_range=0.2)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(train_df, directory="classification/train", 
                                                    x_col="filename", y_col="target",
                                                    target_size=(img_height, img_width), 
                                                    batch_size=batch_size, class_mode='categorical')

validation_generator = train_datagen.flow_from_dataframe(val_df, directory="classification/val", 
                                                         x_col="filename", y_col="target",
                                                         target_size=(img_height, img_width), 
                                                         batch_size=batch_size, class_mode='categorical')

test_generator = test_datagen.flow_from_dataframe(test_df, directory="classification/test", 
                                                  x_col="filename", y_col="target", 
                                                  target_size=(img_height, img_width), 
                                                  batch_size=batch_size, class_mode='categorical', shuffle=False)


Found 45095 validated image filenames belonging to 102 classes.
Found 7508 validated image filenames belonging to 102 classes.
Found 22619 validated image filenames belonging to 102 classes.
