## Image processing - food classifier

### Part I - Baseline Evaluation

Using our own collected data as the input to the classification model. Compare the results versus the results from the Food-101 dataset

First test if the model correctly loaded, and predict example image

In [None]:
import shutil
import sys
sys.path.insert(0, './src')
from models.classifier import FoodClassifier
from project_utils import calculate_metrics
import plot

# Initialize the classification model
model = FoodClassifier()

# Model testing, predict a single image
example_image = './data/example.JPG'
results = model.predict_single(example_image)
plot.single_image(example_image, title=f"{results['label']}: {results['confidence']:.2f}")

Run prediction on our collected dataset as the baseline

In [None]:
dataset_path = './data/raw'
results = model.predict_folder(dataset_path)
plot.wrong_predictions(results, folder = dataset_path)
metrics = calculate_metrics(**results)
print(metrics)

### Part II - Individual Algorithm Evaluation

Process our collected data with the image processing algorithms. Use them as the input to the model. Find which individual algorithm performs best

In [None]:
import os 
from preprocessing.lowlight import gamma, CLAHE, SSRetinex
from preprocessing.deblurr import ssk, usm, swf
from preprocessing.downscaling import Lanczos, Lanczos_SAID, DPID
from project_utils import preprocess_folder

processor = model.processor
processor.do_resize = True
processor.do_center_crop = False

output_path = './data/preprocessed'

# Low-light
for func in [gamma, CLAHE, SSRetinex]:
    # 修正：使用 os.path.join 建立子資料夾，例如 ./data/preprocessed/gamma
    out_dir = os.path.join(output_path, func.__name__)
    preprocess_folder(func, input_dir=dataset_path, output_dir=out_dir)
    results = model.predict_folder(out_dir)
    metrics = calculate_metrics(**results)
    print(f"Metrics for {func.__name__}: {metrics}")
    # plot.wrong_predictions(results, folder = out_dir)

# 這樣 output_path (./data/preprocessed) 才會存在，rmtree 才能正常刪除
if os.path.exists(output_path):
    shutil.rmtree(output_path)

processor.do_resize = True
processor.do_center_crop = False

# Deblurring
for func in [ssk, usm, swf]:
    out_dir = os.path.join(output_path, func.__name__)
    preprocess_folder(func, input_dir=dataset_path, output_dir=out_dir)
    results = model.predict_folder(out_dir)
    metrics = calculate_metrics(**results)
    print(f"Metrics for {func.__name__}: {metrics}")
    # plot.wrong_predictions(results, folder = out_dir)

if os.path.exists(output_path):
    shutil.rmtree(output_path)

processor.do_resize = False
processor.do_center_crop = False

# Downscaling
for func in [Lanczos, Lanczos_SAID, DPID]:
    out_dir = os.path.join(output_path, func.__name__)
    preprocess_folder(func, input_dir=dataset_path, output_dir=out_dir)
    results = model.predict_folder(out_dir)
    metrics = calculate_metrics(**results)
    print(f"Metrics for {func.__name__}: {metrics}")
    # plot.wrong_predictions(results, folder = out_dir)

if os.path.exists(output_path):
    shutil.rmtree(output_path)

After finding the best algorithm of each distortion, combine them

In [None]:

preprocess_folder(CLAHE, input_dir=dataset_path, output_dir=output_path)
preprocess_folder(ssk, input_dir=output_path, output_dir=output_path)
preprocess_folder(Lanczos, input_dir=output_path, output_dir=output_path)

model.processor.do_resize = False
model.processor.do_center_crop = False

results = model.predict_folder(output_path)
metrics = calculate_metrics(**results)
print(f"Metrics for best combination: {metrics}")
# plot.wrong_predictions(results, folder = output_path)
shutil.rmtree(output_path)

### Part III - Combination Search

Find the best low-light - deblur - downscale combination

In [None]:
def Identity(image):
    return image
# find the best low-light - deblur - downscale combination

model.processor.do_resize = False
model.processor.do_center_crop = False

output_path = './data/preprocessed'
best_metrics = None
best_combination = None

for func1 in [Identity, gamma, CLAHE, SSRetinex]:
    for func2 in [Identity, ssk, usm, swf]:
        for func3 in [Lanczos, Lanczos_SAID, DPID]:
            preprocess_folder(func1, input_dir=dataset_path, output_dir=output_path)
            preprocess_folder(func2, input_dir=output_path, output_dir=output_path)
            preprocess_folder(func3, input_dir=output_path, output_dir=output_path)
            results = model.predict_folder(output_path)
            metrics = calculate_metrics(**results)
            print(f"Metrics for {func1.__name__} + {func2.__name__} + {func3.__name__}: {metrics}")
            # plot.wrong_predictions(results, folder = output_path)
            shutil.rmtree(output_path)

            if best_metrics is None or metrics['accuracy'] > best_metrics['accuracy']:
                best_metrics = metrics
                best_combination = (func1.__name__, func2.__name__, func3.__name__)

print(f"Best combination: {best_combination} with metrics: {best_metrics}")

### Part IV - Model Fine-Tuning

In [None]:
model.processor.do_resize = True
# model.processor.do_center_crop = True  


from models.trainer import fine_tune

history = fine_tune(
    model_wrapper=model,
    data_dir=dataset_path,
    val_ratio=0.3,
    epochs=20,
    batch_size=4,
    lr=5e-5
)

plot.training_history(history)