In [None]:
import os
import subprocess
import zipfile
import glob

# Define the paths
base_dir = './data'
dataset_dir = os.path.join(base_dir, 'food-101')
zip_path = os.path.join(base_dir, 'food-101.zip')

# Create the base directory if it doesn't exist
os.makedirs(base_dir, exist_ok=True)

# Download the dataset using Kaggle API
if not os.path.exists(zip_path):
    print("Downloading dataset...")
    subprocess.run(['kaggle', 'datasets', 'download', '-d', 'dansbecker/food-101', '-p', base_dir])
    print("Download complete.")

# Find the correct ZIP file (Kaggle might save it with a different name)
zip_files = glob.glob(os.path.join(base_dir, '*.zip'))
if not zip_files:
    raise FileNotFoundError("No ZIP file found after download.")
zip_path = zip_files[0]

# Extract the dataset if it hasn't been extracted already
if not os.path.exists(dataset_dir):
    print("Extracting dataset...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(base_dir)
    print("Extraction complete.")


Downloading dataset...
Dataset URL: https://www.kaggle.com/datasets/dansbecker/food-101
License(s): other
Downloading food-101.zip to ./data


  1%|          | 99.0M/9.38G [13:15<5:38:12, 491kB/s] 

Step 2: Preprocess the Data
Now, we need to preprocess the images for training. We will use the ImageDataGenerator from keras for this purpose.

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the paths
dataset_path = os.path.join(dataset_dir, 'images')

# Initialize ImageDataGenerator for preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Prepare the training and validation data
train_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

2024-06-12 18:57:14.652371: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


NameError: name 'os' is not defined

Step 3: Build the Model
We'll build a convolutional neural network (CNN) using tensorflow and keras.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(101, activation='softmax')  # 101 classes for 101 types of food
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


Step 4: Train the Model
Train the model using the training data and validate it using the validation dat

In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20
)

Step 5: Evaluate the Model
Evaluate the model on a test set to see how well it performs.

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(validation_generator)
print(f'Test accuracy: {test_acc}')

Putting it All Together
Here’s the complete script:

In [None]:
import os
import subprocess
import zipfile
import glob
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the paths
base_dir = './data'
dataset_dir = os.path.join(base_dir, 'food-101')
zip_path = os.path.join(base_dir, 'food-101.zip')

# Create the base directory if it doesn't exist
os.makedirs(base_dir, exist_ok=True)

# Download the dataset using Kaggle API
if not os.path.exists(zip_path):
    print("Downloading dataset...")
    subprocess.run(['kaggle', 'datasets', 'download', '-d', 'dansbecker/food-101', '-p', base_dir])
    print("Download complete.")

# Find the correct ZIP file (Kaggle might save it with a different name)
zip_files = glob.glob(os.path.join(base_dir, '*.zip'))
if not zip_files:
    raise FileNotFoundError("No ZIP file found after download.")
zip_path = zip_files[0]

# Extract the dataset if it hasn't been extracted already
if not os.path.exists(dataset_dir):
    print("Extracting dataset...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(base_dir)
    print("Extraction complete.")

# Set the paths
dataset_path = os.path.join(dataset_dir, 'images')

# Initialize ImageDataGenerator for preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Prepare the training and validation data
train_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(101, activation='softmax')  # 101 classes for 101 types of food
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20
)

# Evaluate the model
test_loss, test_acc = model.evaluate(validation_generator)
print(f'Test accuracy: {test_acc}')
