In [None]:
import kagglehub

# Download latest version
# path = kagglehub.dataset_download("mexwell/tea-sickness-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/mexwell/tea-sickness-dataset?dataset_version_number=1...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 740M/740M [00:12<00:00, 61.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mexwell/tea-sickness-dataset/versions/1


In [None]:
import os
import numpy as np
import tensorflow as tf
import mlflow
import mlflow.tensorflow
import dagshub

from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# ==========================================
# 1. CONNECT TO DAGSHUB (The Permanent Link)
# ==========================================

# CHANGE THESE TO YOUR DETAILS!
REPO_OWNER = 'amarasinghelra'
REPO_NAME = 'TeaLeaf-Lens2'

print(f"Connecting to DagsHub: {REPO_OWNER}/{REPO_NAME}...")
dagshub.init(repo_owner=REPO_OWNER, repo_name=REPO_NAME, mlflow=True)
print("Connection Successful! MLflow is ready.")

# ==========================================
# 2. HYPERPARAMETERS
# ==========================================
EXP_NAME = "TeaLeaf_Lens_Optimization"
RUN_NAME = "MobileNetV3_DagsHub_Run"

PARAMS = {
    "EPOCHS": 10,
    "BATCH_SIZE": 32,
    "LEARNING_RATE": 0.01,
    "DROPOUT_RATE": 0.3,
    "IMG_SIZE": (224, 224)
}

# ==========================================
# 3. MLFLOW SETUP (The "Best of Both Worlds" Approach)
# ==========================================
mlflow.set_experiment(EXP_NAME)

# STRATEGY PART 1: Use Autolog for the standard training stats
mlflow.tensorflow.autolog()

print(f"Starting Run: {RUN_NAME}")

with mlflow.start_run(run_name=RUN_NAME):
    # Log our config
    mlflow.log_params(PARAMS)

    # --- Data Pipeline (Same as before) ---
    base_search_path = '/root/.cache/kagglehub/datasets/mexwell/tea-sickness-dataset'
    final_data_dir = None
    for root, dirs, files in os.walk(base_search_path):
        if 'Anthracnose' in dirs:
            final_data_dir = root
            break

    train_datagen = ImageDataGenerator(
        preprocessing_function=tf.keras.applications.mobilenet_v3.preprocess_input,
        rotation_range=30, zoom_range=0.2, width_shift_range=0.2, height_shift_range=0.2,
        horizontal_flip=True, validation_split=0.2
    )

    train_generator = train_datagen.flow_from_directory(
        final_data_dir, target_size=PARAMS["IMG_SIZE"], batch_size=PARAMS["BATCH_SIZE"],
        class_mode='categorical', subset='training'
    )
    val_generator = train_datagen.flow_from_directory(
        final_data_dir, target_size=PARAMS["IMG_SIZE"], batch_size=PARAMS["BATCH_SIZE"],
        class_mode='categorical', subset='validation'
    )

    # --- Model Build ---
    base_model = MobileNetV3Small(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(PARAMS["DROPOUT_RATE"])(x)
    predictions = Dense(len(train_generator.class_indices), activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(optimizer=Adam(learning_rate=PARAMS["LEARNING_RATE"]),
                  loss='categorical_crossentropy', metrics=['accuracy'])

    # --- Training (Autolog captures this automatically!) ---
    history = model.fit(train_generator, epochs=PARAMS["EPOCHS"], validation_data=val_generator)

    # --- Post-Processing: Quantization ---
    print("Quantizing model...")
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    tflite_model = converter.convert()

    # STRATEGY PART 2: Manual Logging for the "Interview Metric"
    tflite_size_mb = len(tflite_model) / (1024 * 1024)
    final_val_acc = history.history['val_accuracy'][-1]

    print(f"Logging custom metrics to DagsHub...")
    mlflow.log_metric("tflite_size_mb", tflite_size_mb)
    mlflow.log_metric("final_val_acc", final_val_acc)

    # Optional: You can even save the TFLite file to DagsHub
    with open("tealeaf.tflite", "wb") as f:
        f.write(tflite_model)
    mlflow.log_artifact("tealeaf.tflite")

    print("\n" + "="*40)
    print(f"Success! Check your results at: https://dagshub.com/{REPO_OWNER}/{REPO_NAME}")
    print(f"Final TFLite Size: {tflite_size_mb:.2f} MB")
    print("="*40)

Connecting to DagsHub: amarasinghelra/TeaLeaf-Lens2...


Connection Successful! MLflow is ready.
Starting Run: MobileNetV3_DagsHub_Run
Found 711 images belonging to 8 classes.
Found 174 images belonging to 8 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_small_224_1.0_float_no_top_v2.h5
[1m4334752/4334752[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.1799 - loss: 2.2519

  self._warn_if_super_not_called()


[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m74s[0m 3s/step - accuracy: 0.1799 - loss: 2.2492 - val_accuracy: 0.3506 - val_loss: 1.7539
Epoch 2/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.3277 - loss: 1.7272



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m57s[0m 3s/step - accuracy: 0.3299 - loss: 1.7241 - val_accuracy: 0.5057 - val_loss: 1.4533
Epoch 3/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.4714 - loss: 1.4141



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m55s[0m 2s/step - accuracy: 0.4727 - loss: 1.4115 - val_accuracy: 0.5690 - val_loss: 1.2519
Epoch 4/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5962 - loss: 1.1795



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m55s[0m 2s/step - accuracy: 0.5957 - loss: 1.1794 - val_accuracy: 0.6437 - val_loss: 1.1319
Epoch 5/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6663 - loss: 1.0542



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m56s[0m 2s/step - accuracy: 0.6662 - loss: 1.0520 - val_accuracy: 0.6552 - val_loss: 0.9973
Epoch 6/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6510 - loss: 0.9226



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m54s[0m 2s/step - accuracy: 0.6509 - loss: 0.9231 - val_accuracy: 0.6494 - val_loss: 0.9851
Epoch 7/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6835 - loss: 0.8767



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m55s[0m 2s/step - accuracy: 0.6845 - loss: 0.8751 - val_accuracy: 0.7011 - val_loss: 0.9192
Epoch 8/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7263 - loss: 0.8438



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m55s[0m 2s/step - accuracy: 0.7259 - loss: 0.8429 - val_accuracy: 0.6897 - val_loss: 0.8985
Epoch 9/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.7553 - loss: 0.7466



[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m53s[0m 2s/step - accuracy: 0.7556 - loss: 0.7457 - val_accuracy: 0.6954 - val_loss: 0.8686
Epoch 10/10
[1m23/23[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m58s[0m 2s/step - accuracy: 0.7378 - loss: 0.7520 - val_accuracy: 0.6782 - val_loss: 0.8757




Quantizing model...
Saved artifact at '/tmp/tmpjz1bvy1u'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 8), dtype=tf.float32, name=None)
Captures:
  137354599176848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599174928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599176272: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599171664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599174160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599174544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599176080: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599175888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599175120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137354599177808: TensorSpec(shape=(), dtype=tf.resource, name=No