### 1) Carica il dataset food/no_food
(puoi fare un ciclo for o usare flow_from_directory)
### 2) Usa sempre ResNet50 per ottenere una matrice di features (train e test)
(devi ottenere due matrici di dimensione (3000, 2048) e (1000, 2048))
### 3) Addestra una regressione logistica
(esatto, la resnet fa heavy lifting.. e sulle features basta un modello molto semplice)
### 4) Riesci ad avere >.75 in test?
### 5) Visualizza alcune immagini, la ground truth, e la predizione

In [1]:
#say no to warnings!
import warnings
warnings.filterwarnings("ignore")
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)




In [35]:
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import \
    ImageDataGenerator,load_img,array_to_img,img_to_array
from tensorflow.keras.applications import resnet50,ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tqdm import tqdm

# DATASET

In [16]:
#use util custom function
def read_dataset(directory,dataset_type="train"):
    subdirectories = ["food_images","negative_non_food"]
    X = []
    y = []
    for subdirectory in subdirectories:
        subdirectory_path = os.path.join(directory,subdirectory)
        filenames = os.listdir(subdirectory_path)
        jpg_files = [f for f in filenames if f.endswith('.jpg')]

        for filename in tqdm(jpg_files, desc=f"Reading {dataset_type}/{subdirectory}",
                            unit="item"):
            img_path = os.path.join(subdirectory_path,filename)
            img = load_img(img_path,target_size=(224,224))
            img_array = img_to_array(img)
            X.append(img_array)
            y.append(1 if subdirectory == "food_images" else 0)
            
    return (np.array(X),np.array(y))


In [24]:
%%time
train_path = os.path.join("food_data_light","train")
test_path = os.path.join("food_data_light","test")
# train_path = os.path.join("food_data","train")
# test_path = os.path.join("food_data","test")

X_train_img, y_train = read_dataset(train_path)
X_test_img, y_test = read_dataset(test_path,"test")

Reading train/food_images: 100%|██████████| 228/228 [00:01<00:00, 195.45item/s]
Reading train/negative_non_food: 100%|██████████| 228/228 [00:02<00:00, 107.92item/s]
Reading test/food_images: 100%|██████████| 51/51 [00:00<00:00, 183.33item/s]
Reading test/negative_non_food: 100%|██████████| 51/51 [00:00<00:00, 144.20item/s]

CPU times: total: 1.03 s
Wall time: 4.2 s





In [25]:
print(np.unique(y_train, return_counts=True))
print(np.unique(y_test, return_counts=True))

(array([0, 1]), array([228, 228], dtype=int64))
(array([0, 1]), array([51, 51], dtype=int64))


In [26]:
print("X_train_img:",X_train_img.shape)
print("X_test_img:",X_test_img.shape)
print("y_train:",y_train.shape)
print("y_test:",y_test.shape)

X_train_img: (456, 224, 224, 3)
X_test_img: (102, 224, 224, 3)
y_train: (456,)
y_test: (102,)


# MODEL


In [22]:
#Use ResNet50 only for feature extraction
base_model = ResNet50(include_top=False, weights='imagenet')

print(f"Original #layers: {len(base_model.layers)}")

Original #layers: 175


In [34]:
X_train = GlobalAveragePooling2D()(base_model.predict(X_train_img))
print(X_train.shape)
X_test = GlobalAveragePooling2D()(base_model.predict(X_test_img))
print(X_test.shape)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 1s/step
(456, 2048)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1s/step
(102, 2048)


In [38]:
classification_model = LogisticRegression(verbose=5)
classification_model.fit(X_train,y_train)

y_pred = classification_model.predict(X_test)

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.98      0.82      0.89        51
           1       0.85      0.98      0.91        51

    accuracy                           0.90       102
   macro avg       0.91      0.90      0.90       102
weighted avg       0.91      0.90      0.90       102

