<a href="https://colab.research.google.com/github/Mananalik/Food_Freshness_Classification/blob/main/Base_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opendatasets -q

In [None]:
import opendatasets as od
print("Downloading sensor dataset...")
od.download('https://www.kaggle.com/datasets/mehrabmahdian/food-freshness-electronic-nose-data')

print("\n✅ Sensor dataset downloaded successfully!")

Downloading sensor dataset...
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: mananjaat28
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/mehrabmahdian/food-freshness-electronic-nose-data
Downloading food-freshness-electronic-nose-data.zip to ./food-freshness-electronic-nose-data


100%|██████████| 9.81M/9.81M [00:00<00:00, 618MB/s]








✅ Sensor dataset downloaded successfully!


In [None]:
!pip install tensorflow



In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import json

In [None]:
DATA_DIRECTORY = 'food-freshness-electronic-nose-data/AllSmaples-Report/'
SENSOR_COLUMNS = ['MQ3', 'MQ8', 'MQ135']
SEQUENCE_LENGTH = 180

In [None]:
def load_and_preprocess_data(data_dir, sensor_list, seq_len):
    all_data = []
    labels = []
    freshness_mapping = {'D1': 0, 'D2': 1, 'D3': 2, 'D4': 3, 'D5': 4}

    print("--- Loading and parsing data files ---")
    for filename in os.listdir(data_dir):
        if filename.endswith(".csv"):
            parts = filename.replace('.csv', '').split(' ')
            if len(parts) > 1 and parts[-1] in freshness_mapping:
                label = freshness_mapping[parts[-1]]
                filepath = os.path.join(data_dir, filename)
                df = pd.read_csv(filepath)


                if all(col in df.columns for col in sensor_list):
                    df_sensors = df[sensor_list]


                    for i in range(0, len(df_sensors) - seq_len + 1, seq_len):
                        sequence = df_sensors.iloc[i:i + seq_len].values
                        all_data.append(sequence)
                        labels.append(label)
                else:
                    print(f"Skipping {filename}: does not contain all required sensors.")
            else:
                print(f"Skipping {filename}: unrecognized freshness label.")

    X_sequences = np.array(all_data)
    y_labels = np.array(labels)
    freshness_labels = {v: k for k, v in freshness_mapping.items()}

    print(f"\nTotal sequences created: {len(X_sequences)}")
    return X_sequences, y_labels, freshness_labels

In [None]:
def scale_data(X_train, X_test):
    print("\n--- Scaling data ---")
    n_features = X_train.shape[2]
    X_train_reshaped = X_train.reshape(-1, n_features)
    scaler = StandardScaler()
    X_train_scaled_reshaped = scaler.fit_transform(X_train_reshaped)
    X_train_scaled = X_train_scaled_reshaped.reshape(X_train.shape)

    X_test_reshaped = X_test.reshape(-1, n_features)
    X_test_scaled_reshaped = scaler.transform(X_test_reshaped)
    X_test_scaled = X_test_scaled_reshaped.reshape(X_test.shape)

    return X_train_scaled, X_test_scaled, scaler

In [None]:
def build_cnn_lstm_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(filters=64, kernel_size=5, activation='relu', input_shape=input_shape, name="Conv1D_1"),
        MaxPooling1D(pool_size=2, name="MaxPool_1"),
        Dropout(0.3, name="Dropout_1"),
        LSTM(100, return_sequences=True, name="LSTM_1"),
        Dropout(0.3, name="Dropout_2"),
        LSTM(50, name="LSTM_2"),
        Dropout(0.3, name="Dropout_3"),
        Dense(50, activation='relu', name="Dense_1"),
        Dense(num_classes, activation='softmax', name="Output_Layer")
    ], name="CNN_LSTM_Hybrid")
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
try:

    X, y_raw, freshness_labels = load_and_preprocess_data(DATA_DIRECTORY, SENSOR_COLUMNS, SEQUENCE_LENGTH)


    num_classes = len(np.unique(y_raw))
    y_one_hot = tf.keras.utils.to_categorical(y_raw, num_classes=num_classes)
    X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42, stratify=y_raw)


    X_train_scaled, X_test_scaled, scaler = scale_data(X_train, X_test)
    joblib.dump(scaler, 'time_series_scaler.pkl')
    print("✅ Scaler saved to 'time_series_scaler.pkl'")


    input_shape = (X_train_scaled.shape[1], X_train_scaled.shape[2])
    model = build_cnn_lstm_model(input_shape, num_classes)
    model.summary()

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')
    ]

    print("\n--- Training CNN-LSTM Model ---")
    history = model.fit(
        X_train_scaled, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test_scaled, y_test),
        callbacks=callbacks,
        verbose=1
    )


    loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
    print(f"\n🎯 Final Model Accuracy: {accuracy*100:.2f}%")

    with open('freshness_labels.json', 'w') as f:
        json.dump(freshness_labels, f)
    print("✅ Label mapping saved to 'freshness_labels.json'")
    print("\n--- Project Complete! ---")

except Exception as e:
    print(f"\nAn error occurred: {e}")

--- Loading and parsing data files ---
Skipping AppleTomato.csv: unrecognized freshness label.
Skipping AppleMandarin.csv: unrecognized freshness label.
Skipping AppleBanana.csv: unrecognized freshness label.
Skipping BananaMandarin.csv: unrecognized freshness label.
Skipping TomatoBanana.csv: unrecognized freshness label.
Skipping TomatoMandarin.csv: unrecognized freshness label.
Skipping AppleBananaMandarin.csv: unrecognized freshness label.
Skipping Mandarin.csv: unrecognized freshness label.
Skipping AppleBananaTomato.csv: unrecognized freshness label.

Total sequences created: 3622

--- Scaling data ---
✅ Scaler saved to 'time_series_scaler.pkl'


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- Training CNN-LSTM Model ---
Epoch 1/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 152ms/step - accuracy: 0.3644 - loss: 1.3848 - val_accuracy: 0.5366 - val_loss: 0.9588
Epoch 2/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 139ms/step - accuracy: 0.5758 - loss: 0.9627 - val_accuracy: 0.5917 - val_loss: 0.8807
Epoch 3/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 142ms/step - accuracy: 0.6338 - loss: 0.8188 - val_accuracy: 0.6579 - val_loss: 0.7339
Epoch 4/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 146ms/step - accuracy: 0.6307 - loss: 0.8078 - val_accuracy: 0.7310 - val_loss: 0.6846
Epoch 5/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 140ms/step - accuracy: 0.6592 - loss: 0.7337 - val_accuracy: 0.7324 - val_loss: 0.6297
Epoch 6/50
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 143ms/step - accuracy: 0.6979 - loss: 0.6828 - val_accuracy: 0.7683 - val_lo