In [None]:
# ========================================
# NUTRITION AI - NOTEBOOK GOOGLE COLAB COMPLET
# ========================================
# 
# Ce notebook entra√Æne un mod√®le d'IA pour analyser
# les valeurs nutritionnelles d'images de nourriture
#
# INSTRUCTIONS:
# 1. Runtime > Change runtime type > GPU (T4)
# 2. Ex√©cutez toutes les cellules
# 3. Dur√©e totale: ~4 heures

# ========================================
# CELLULE 1: Configuration et Installation
# ========================================

# V√©rifier le GPU
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU disponible:", tf.config.list_physical_devices('GPU'))

# Installer les d√©pendances
!pip install -q tensorflow==2.15.0
!pip install -q pandas matplotlib seaborn scikit-learn tqdm Pillow


# ========================================
# CELLULE 2: T√©l√©chargement du Dataset
# ========================================

from pathlib import Path
import requests
import tarfile
from tqdm import tqdm

BASE_DIR = Path("/content/nutrition_ai")
DATA_DIR = BASE_DIR / "food-101"
BASE_DIR.mkdir(parents=True, exist_ok=True)

# T√©l√©charger Food-101
FOOD101_URL = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz"
FOOD101_TAR = BASE_DIR / "food-101.tar.gz"

if not (BASE_DIR / "food-101" / "images").exists():
    print("üì• T√©l√©chargement de Food-101 (5 GB)...")
    
    response = requests.get(FOOD101_URL, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    
    with open(FOOD101_TAR, 'wb') as f, tqdm(
        total=total_size, unit='B', unit_scale=True
    ) as pbar:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
            pbar.update(len(chunk))
    
    print("üì¶ Extraction...")
    with tarfile.open(FOOD101_TAR, 'r:gz') as tar:
        tar.extractall(BASE_DIR)
    
    print("‚úÖ Dataset pr√™t!")
else:
    print("‚úÖ Dataset d√©j√† t√©l√©charg√©")


# ========================================
# CELLULE 3: Base de Donn√©es Nutritionnelle
# ========================================

import json

NUTRITION_DB = {
    'apple_pie': {'calories': 237, 'protein': 2.4, 'fat': 11, 'carbs': 34, 'fiber': 1.6, 'sugars': 15, 'sodium': 266},
    'baby_back_ribs': {'calories': 361, 'protein': 17, 'fat': 30, 'carbs': 5, 'fiber': 0.2, 'sugars': 4, 'sodium': 791},
    'baklava': {'calories': 428, 'protein': 5, 'fat': 23, 'carbs': 51, 'fiber': 2, 'sugars': 28, 'sodium': 242},
    'beef_carpaccio': {'calories': 121, 'protein': 22, 'fat': 3.5, 'carbs': 0.5, 'fiber': 0, 'sugars': 0, 'sodium': 48},
    'beef_tartare': {'calories': 155, 'protein': 20, 'fat': 8, 'carbs': 1, 'fiber': 0, 'sugars': 0, 'sodium': 362},
    'beet_salad': {'calories': 65, 'protein': 2, 'fat': 3, 'carbs': 9, 'fiber': 3, 'sugars': 6, 'sodium': 198},
    'beignets': {'calories': 269, 'protein': 5, 'fat': 11, 'carbs': 38, 'fiber': 1, 'sugars': 9, 'sodium': 456},
    'bibimbap': {'calories': 145, 'protein': 8, 'fat': 4, 'carbs': 20, 'fiber': 2.5, 'sugars': 3, 'sodium': 485},
    # ... (inclure les 101 classes compl√®tes ici - voir script 02)
    'pizza': {'calories': 266, 'protein': 11, 'fat': 10, 'carbs': 33, 'fiber': 2, 'sugars': 4, 'sodium': 598},
    'spaghetti_bolognese': {'calories': 151, 'protein': 8, 'fat': 5, 'carbs': 18, 'fiber': 2, 'sugars': 3, 'sodium': 234},
    'sushi': {'calories': 143, 'protein': 6, 'fat': 4, 'carbs': 21, 'fiber': 1, 'sugars': 3, 'sodium': 456},
}

# Sauvegarder
NUTRITION_FILE = BASE_DIR / "nutrition_db.json"
with open(NUTRITION_FILE, 'w') as f:
    json.dump(NUTRITION_DB, f, indent=2)

FOOD_CLASSES = sorted(NUTRITION_DB.keys())
print(f"‚úÖ {len(FOOD_CLASSES)} classes nutritionnelles charg√©es")


# ========================================
# CELLULE 4: Construction du Mod√®le
# ========================================

from tensorflow import keras
from tensorflow.keras import layers

IMG_SIZE = 224
NUM_CLASSES = len(FOOD_CLASSES)

def build_model():
    """Mod√®le avec EfficientNetV2 + double sortie"""
    
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    
    # Base pr√©-entra√Æn√©e
    base = keras.applications.EfficientNetV2B0(
        include_top=False,
        weights='imagenet',
        input_tensor=inputs,
        pooling='avg'
    )
    
    # Fine-tuning partiel
    base.trainable = True
    for layer in base.layers[:-50]:
        layer.trainable = False
    
    x = base.output
    
    # Branche classification
    cls = layers.Dense(512, activation='relu')(x)
    cls = layers.Dropout(0.3)(cls)
    cls = layers.Dense(256, activation='relu')(cls)
    cls = layers.Dropout(0.2)(cls)
    classification = layers.Dense(NUM_CLASSES, activation='softmax', 
                                   name='food_classification')(cls)
    
    # Branche nutrition
    nutr = layers.Dense(512, activation='relu')(x)
    nutr = layers.Dropout(0.3)(nutr)
    nutr = layers.Dense(256, activation='relu')(nutr)
    nutr = layers.Dropout(0.2)(nutr)
    nutr = layers.Dense(128, activation='relu')(nutr)
    nutrition = layers.Dense(7, activation='linear', 
                             name='nutrition_values')(nutr)
    
    model = keras.Model(inputs, [classification, nutrition])
    
    model.compile(
        optimizer=keras.optimizers.Adam(0.001),
        loss={
            'food_classification': 'categorical_crossentropy',
            'nutrition_values': 'mse'
        },
        loss_weights={'food_classification': 1.0, 'nutrition_values': 0.5},
        metrics={
            'food_classification': ['accuracy'],
            'nutrition_values': ['mae']
        }
    )
    
    return model

model = build_model()
model.summary()


# ========================================
# CELLULE 5: Data Pipeline
# ========================================

import numpy as np

IMG_SIZE = 224
BATCH_SIZE = 32

CLASS_TO_IDX = {cls: idx for idx, cls in enumerate(FOOD_CLASSES)}

def get_nutrition(food_name):
    """R√©cup√®re les valeurs nutritionnelles"""
    n = NUTRITION_DB[food_name]
    return np.array([n['calories'], n['protein'], n['fat'], 
                    n['carbs'], n['fiber'], n['sugars'], n['sodium']], 
                   dtype=np.float32)

def create_dataset(paths, labels, augment=True):
    """Cr√©e un tf.data.Dataset optimis√©"""
    
    def process(img_path, label):
        # Charger image
        img = tf.io.read_file(img_path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
        
        # Augmentation
        if augment:
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_brightness(img, 0.2)
            img = tf.image.random_contrast(img, 0.8, 1.2)
        
        img = img / 255.0
        
        # Labels
        class_idx = CLASS_TO_IDX[label.numpy().decode()]
        class_label = tf.one_hot(class_idx, NUM_CLASSES)
        
        nutrition = tf.py_function(
            lambda x: get_nutrition(x.numpy().decode()),
            [label], tf.float32
        )
        nutrition.set_shape([7])
        
        return img, {
            'food_classification': class_label,
            'nutrition_values': nutrition
        }
    
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if augment:
        ds = ds.shuffle(10000)
    ds = ds.map(process, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(tf.data.AUTOTUNE)
    
    return ds


# ========================================
# CELLULE 6: Chargement des Donn√©es
# ========================================

meta_dir = BASE_DIR / "food-101" / "meta"
images_dir = BASE_DIR / "food-101" / "images"

# Charger splits
with open(meta_dir / "train.json") as f:
    train_data = json.load(f)

train_paths = []
train_labels = []

for cls, imgs in train_data.items():
    for img_name in imgs:
        img_path = images_dir / f"{img_name}.jpg"
        if img_path.exists():
            train_paths.append(str(img_path))
            train_labels.append(cls)

print(f"‚úÖ {len(train_paths)} images d'entra√Ænement")

# Split train/val
from sklearn.model_selection import train_test_split

train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_paths, train_labels, test_size=0.1, random_state=42
)

print(f"Train: {len(train_paths)}, Val: {len(val_paths)}")

# Cr√©er datasets
train_ds = create_dataset(train_paths, train_labels, augment=True)
val_ds = create_dataset(val_paths, val_labels, augment=False)


# ========================================
# CELLULE 7: Entra√Ænement
# ========================================

EPOCHS = 30

# Callbacks
checkpoint = keras.callbacks.ModelCheckpoint(
    '/content/best_model.h5',
    monitor='val_food_classification_accuracy',
    mode='max',
    save_best_only=True,
    verbose=1
)

early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7,
    verbose=1
)

# ENTRA√éNEMENT
print("\nüéì D√âBUT DE L'ENTRA√éNEMENT (dur√©e estim√©e: 3-4 heures)\n")

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=[checkpoint, early_stop, reduce_lr],
    verbose=1
)

print("\n‚úÖ Entra√Ænement termin√©!")


# ========================================
# CELLULE 8: Visualisation des R√©sultats
# ========================================

import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Accuracy
axes[0,0].plot(history.history['food_classification_accuracy'], label='Train')
axes[0,0].plot(history.history['val_food_classification_accuracy'], label='Val')
axes[0,0].set_title('Classification Accuracy')
axes[0,0].set_xlabel('Epoch')
axes[0,0].legend()
axes[0,0].grid(True)

# Loss
axes[0,1].plot(history.history['food_classification_loss'], label='Train')
axes[0,1].plot(history.history['val_food_classification_loss'], label='Val')
axes[0,1].set_title('Classification Loss')
axes[0,1].set_xlabel('Epoch')
axes[0,1].legend()
axes[0,1].grid(True)

# Nutrition MAE
axes[1,0].plot(history.history['nutrition_values_mae'], label='Train')
axes[1,0].plot(history.history['val_nutrition_values_mae'], label='Val')
axes[1,0].set_title('Nutrition MAE')
axes[1,0].set_xlabel('Epoch')
axes[1,0].legend()
axes[1,0].grid(True)

# Total Loss
axes[1,1].plot(history.history['loss'], label='Train')
axes[1,1].plot(history.history['val_loss'], label='Val')
axes[1,1].set_title('Total Loss')
axes[1,1].set_xlabel('Epoch')
axes[1,1].legend()
axes[1,1].grid(True)

plt.tight_layout()
plt.savefig('/content/training_history.png', dpi=150)
plt.show()

print(f"‚úÖ Meilleure val accuracy: {max(history.history['val_food_classification_accuracy']):.4f}")


# ========================================
# CELLULE 9: Test sur une Image
# ========================================

from PIL import Image

def predict_nutrition(image_path):
    """Pr√©diction sur une image"""
    
    # Charger et pr√©traiter
    img = Image.open(image_path).resize((IMG_SIZE, IMG_SIZE))
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, 0)
    
    # Pr√©diction
    cls_pred, nutr_pred = model.predict(img_array, verbose=0)
    
    # R√©sultats
    top_idx = np.argmax(cls_pred[0])
    food_name = FOOD_CLASSES[top_idx]
    confidence = cls_pred[0][top_idx]
    
    nutrition = {
        'calories': round(float(nutr_pred[0][0]), 1),
        'protein_g': round(float(nutr_pred[0][1]), 1),
        'fat_g': round(float(nutr_pred[0][2]), 1),
        'carbs_g': round(float(nutr_pred[0][3]), 1),
        'fiber_g': round(float(nutr_pred[0][4]), 1),
        'sugars_g': round(float(nutr_pred[0][5]), 1),
        'sodium_mg': round(float(nutr_pred[0][6]), 1)
    }
    
    # Affichage
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"{food_name.replace('_', ' ').title()}\n{confidence*100:.1f}% confidence")
    
    plt.subplot(1, 2, 2)
    labels = list(nutrition.keys())
    values = list(nutrition.values())
    plt.barh(labels, values, color='skyblue')
    plt.xlabel('Value')
    plt.title('Nutrition Analysis')
    plt.tight_layout()
    plt.show()
    
    result = {
        'food': food_name,
        'confidence': round(float(confidence), 3),
        'nutrition': nutrition
    }
    
    print(json.dumps(result, indent=2))
    return result

# Test sur une image al√©atoire
test_img = train_paths[0]
print(f"Test sur: {test_img}")
predict_nutrition(test_img)


# ========================================
# CELLULE 10: T√©l√©charger le Mod√®le
# ========================================

# Sauvegarder le mod√®le final
model.save('/content/nutrition_model_final.h5')

# T√©l√©charger vers Google Drive
from google.colab import drive
drive.mount('/content/drive')

!cp /content/best_model.h5 /content/drive/MyDrive/
!cp /content/nutrition_model_final.h5 /content/drive/MyDrive/

print("‚úÖ Mod√®les sauvegard√©s dans Google Drive!")
print("\nPour t√©l√©charger:")
print("1. Allez dans Google Drive")
print("2. T√©l√©chargez: best_model.h5 et nutrition_model_final.h5")


# ========================================
# FIN DU NOTEBOOK
# ========================================
# 
# R√âSULTATS ATTENDUS:
# - Accuracy: 70-85% (selon dur√©e d'entra√Ænement)
# - Nutrition MAE: 15-30 (selon les m√©triques)
# - Taille du mod√®le: ~50-80 MB
#
# PROCHAINES √âTAPES:
# 1. T√©l√©charger le mod√®le
# 2. Utiliser 06_inference.py pour tester
# 3. Exporter avec 07_export_model.py