# Waste Classifier Model Training
This notebook trains a model to predict the condition score of waste items using both images and form data.

In [23]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.optimizers import Adam
import pickle

In [24]:
# Step 2: Load the dataset
df = pd.read_csv("../data/full_items_extended_dataset.csv")
df.head()

Unnamed: 0,item_type,years_used,condition,description,image_damage,condition_score,output,image_path
0,Blenders/Mixers,1,Working,"Motor works fine, jar slightly scratched",Low,0.95,Refurbish and Resell,images/blendersmixers_working_low_1yrs.jpg
1,Blenders/Mixers,2,Working,"Motor works fine, jar slightly scratched",Low,0.92,Refurbish and Resell,images/blendersmixers_working_low_2yrs.jpg
2,Blenders/Mixers,3,Working,"Motor works fine, jar slightly scratched",Low,0.89,Refurbish and Resell,images/blendersmixers_working_low_3yrs.jpg
3,Blenders/Mixers,4,Working,"Motor works fine, jar slightly scratched",Low,0.86,Refurbish and Resell,images/blendersmixers_working_low_4yrs.jpg
4,Blenders/Mixers,5,Working,"Motor works fine, jar slightly scratched",Low,0.83,Refurbish and Resell,images/blendersmixers_working_low_5yrs.jpg


In [7]:
# Step 3: Load and preprocess images
def process_image(image_path):
    try:
        img = load_img("../" + image_path, target_size=(224, 224))
        img_array = img_to_array(img)
        return preprocess_input(img_array)
    except Exception as e:
        print(f"Error loading {image_path}: {e}")
        return np.zeros((224, 224, 3))  # fallback for missing images

images = np.array([process_image(path) for path in df["image_path"]])
print("Images shape:", images.shape)

Error loading images/blendersmixers_working_low_1yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_working_low_1yrs.jpg'
Error loading images/blendersmixers_working_low_2yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_working_low_2yrs.jpg'
Error loading images/blendersmixers_working_low_3yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_working_low_3yrs.jpg'
Error loading images/blendersmixers_working_low_4yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_working_low_4yrs.jpg'
Error loading images/blendersmixers_working_low_5yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_working_low_5yrs.jpg'
Error loading images/blendersmixers_working_moderate_1yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_working_moderate_1yrs.jpg'
Error loading images/blendersmixers_working_moderate_2yrs.jpg: [Errno 2] No such file or directory: '../images/blendersmixers_workin

In [8]:
# Step 4: Encode categorical columns
df["item_type"] = LabelEncoder().fit_transform(df["item_type"])
df["condition"] = LabelEncoder().fit_transform(df["condition"])

In [25]:
# Step 4b: Text preprocessing for description
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize descriptions
max_words = 1000
max_len = 30
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(df['description'])
desc_sequences = tokenizer.texts_to_sequences(df['description'])
desc_padded = pad_sequences(desc_sequences, maxlen=max_len, padding='post')
print('Description padded shape:', desc_padded.shape)

with open("../model/tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)
print("Tokenizer saved to ../model/tokenizer.pkl")

Description padded shape: (450, 30)
Tokenizer saved to ../model/tokenizer.pkl


**Note:** After running the cell above, make sure that `tokenizer.pkl` exists in the `model` folder. If it does, you can now run your Streamlit app and it will work with the description input.

In [9]:
# Step 5: Prepare features and target
form_data = df[["item_type", "years_used", "condition"]].values
condition_score = df["condition_score"].values

In [17]:
# Step 5: Prepare features and target
form_data = df[["item_type", "years_used", "condition"]].values
condition_score = df["condition_score"].values
output_class = df["output"].map({"Refurbish and Resell": 0, "Salvage Components": 1, "Recycle": 2}).values

In [10]:
# Step 6: Build the model
# Image branch
image_input = Input(shape=(224, 224, 3))
base_model = MobileNetV2(include_top=False, input_tensor=image_input, weights="imagenet", pooling="avg")
image_features = base_model.output
image_features = Dense(128, activation='relu')(image_features)

# Form branch
form_input = Input(shape=(form_data.shape[1],))
form_features = Dense(64, activation='relu')(form_input)

# Merge branches
combined = Concatenate()([image_features, form_features])
combined = Dense(64, activation='relu')(combined)
combined = Dropout(0.3)(combined)

# Output
score_output = Dense(1, name="score")(combined)

# Final model
model = Model(inputs=[image_input, form_input], outputs=[score_output])
model.compile(loss='mse', optimizer=Adam(1e-4), metrics=['mae'])
model.summary()

  base_model = MobileNetV2(include_top=False, input_tensor=image_input, weights="imagenet", pooling="avg")


In [18]:
# Step 6: Build the multi-input, multi-output model
from tensorflow.keras.layers import Embedding, LSTM, Flatten
from tensorflow.keras.utils import to_categorical

# Text branch
text_input = Input(shape=(desc_padded.shape[1],))
text_emb = Embedding(input_dim=max_words, output_dim=32, input_length=desc_padded.shape[1])(text_input)
text_lstm = LSTM(32)(text_emb)

# Image branch
image_input = Input(shape=(224, 224, 3))
base_model = MobileNetV2(include_top=False, input_tensor=image_input, weights="imagenet", pooling="avg")
image_features = base_model.output
image_features = Dense(128, activation='relu')(image_features)

# Form branch
form_input = Input(shape=(form_data.shape[1],))
form_features = Dense(64, activation='relu')(form_input)

# Merge all
combined = Concatenate()([image_features, form_features, text_lstm])
combined = Dense(64, activation='relu')(combined)
combined = Dropout(0.3)(combined)

# Outputs
score_output = Dense(1, name="score_output")(combined)
class_output = Dense(3, activation='softmax', name="class_output")(combined)

# Final model
model = Model(inputs=[image_input, form_input, text_input], outputs=[score_output, class_output])
model.compile(loss={"score_output": "mean_squared_error", "class_output": "sparse_categorical_crossentropy"},
              optimizer=Adam(1e-4),
              metrics={"score_output": "mae", "class_output": "accuracy"})
model.summary()

  base_model = MobileNetV2(include_top=False, input_tensor=image_input, weights="imagenet", pooling="avg")


In [11]:
# Step 7: Train/test split
X_img_train, X_img_val, X_form_train, X_form_val, y_train, y_val = train_test_split(
    images, form_data, condition_score, test_size=0.2, random_state=42
)

In [19]:
# Step 7: Train/test split
X_img_train, X_img_val, X_form_train, X_form_val, X_text_train, X_text_val, y_train, y_val, y_class_train, y_class_val = train_test_split(
    images, form_data, desc_padded, condition_score, output_class, test_size=0.2, random_state=42
)

In [12]:
# Step 8: Train the model
history = model.fit(
    [X_img_train, X_form_train], y_train,
    validation_data=([X_img_val, X_form_val], y_val),
    epochs=10, batch_size=16
)

Epoch 1/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 2s/step - loss: 0.4799 - mae: 0.5304 - val_loss: 0.1201 - val_mae: 0.2641
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 2s/step - loss: 0.4799 - mae: 0.5304 - val_loss: 0.1201 - val_mae: 0.2641
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - loss: 0.1215 - mae: 0.2713 - val_loss: 0.1137 - val_mae: 0.2800
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - loss: 0.1215 - mae: 0.2713 - val_loss: 0.1137 - val_mae: 0.2800
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - loss: 0.1187 - mae: 0.2886 - val_loss: 0.1215 - val_mae: 0.2826
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - loss: 0.1187 - mae: 0.2886 - val_loss: 0.1215 - val_mae: 0.2826
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 2s/step - loss: 0.1044

In [20]:
# Step 8: Train the model
history = model.fit(
    [X_img_train, X_form_train, X_text_train],
    {"score_output": y_train, "class_output": y_class_train},
    validation_data=([X_img_val, X_form_val, X_text_val], {"score_output": y_val, "class_output": y_class_val}),
    epochs=10, batch_size=16
)

Epoch 1/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 2s/step - class_output_accuracy: 0.2815 - class_output_loss: 1.2317 - loss: 1.4545 - score_output_loss: 0.2224 - score_output_mae: 0.3784 - val_class_output_accuracy: 0.2778 - val_class_output_loss: 1.1392 - val_loss: 1.3082 - val_score_output_loss: 0.1723 - val_score_output_mae: 0.3411
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 2s/step - class_output_accuracy: 0.2815 - class_output_loss: 1.2317 - loss: 1.4545 - score_output_loss: 0.2224 - score_output_mae: 0.3784 - val_class_output_accuracy: 0.2778 - val_class_output_loss: 1.1392 - val_loss: 1.3082 - val_score_output_loss: 0.1723 - val_score_output_mae: 0.3411
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - class_output_accuracy: 0.3303 - class_output_loss: 1.1210 - loss: 1.2811 - score_output_loss: 0.1598 - score_output_mae: 0.3257 - val_class_output_accuracy: 0.2889 - val_class_output_los

In [21]:
# Step 9: Save the trained model
model.save("../model/waste_model.h5")
print("Model saved to ../model/waste_model.h5")



Model saved to ../model/waste_model.h5


In [22]:
# Step 9: Save the trained model
model.save("../model/waste_model.h5")
print("Model saved to ../model/waste_model.h5")



Model saved to ../model/waste_model.h5
