In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

In [2]:
# Load dataset
df = pd.read_csv("ewaste_training_detailed2.csv")

In [3]:
# Use query as X and label as y
X = df['query']
y = df['label']

In [4]:
# Train/test split (stratify keeps label balance)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [5]:
# Build pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(lowercase=True, stop_words='english', ngram_range=(1,3))),
    ('clf', LogisticRegression(max_iter=3000, class_weight="balanced"))
])

In [6]:
# Grid search for best parameters
param_grid = {
    'tfidf__max_features': [1000, 2000, 3000],
    'clf__C': [0.5, 1, 2]
}
grid = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)

Fitting 3 folds for each of 9 candidates, totalling 27 fits


In [7]:
# Evaluate on test set
y_pred = grid.predict(X_test)
print("✅ Best Params:", grid.best_params_)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


✅ Best Params: {'clf__C': 0.5, 'tfidf__max_features': 1000}
✅ Accuracy: 0.9552238805970149

Classification Report:
                 precision    recall  f1-score   support

       battery       1.00      1.00      1.00         6
       charger       1.00      1.00      1.00         6
      keyboard       1.00      1.00      1.00         6
        laptop       1.00      0.75      0.86         8
  mobile phone       0.88      0.88      0.88         8
       printer       1.00      1.00      1.00         6
  refrigerator       1.00      1.00      1.00         7
       speaker       0.88      1.00      0.93         7
storage device       0.86      1.00      0.92         6
    television       1.00      1.00      1.00         7

      accuracy                           0.96        67
     macro avg       0.96      0.96      0.96        67
  weighted avg       0.96      0.96      0.95        67


Confusion Matrix:
 [[6 0 0 0 0 0 0 0 0 0]
 [0 6 0 0 0 0 0 0 0 0]
 [0 0 6 0 0 0 0 0 0 0]
 [0 0 0 

In [9]:
# Save final model
joblib.dump(grid.best_estimator_, "ewaste_model.pkl")
print("🎉 Model saved as ewaste_model.pkl")

🎉 Model saved as ewaste_model.pkl


## Model testing

In [10]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

In [11]:
# Step 1: Load your saved pipeline model
model = joblib.load("ewaste_model.pkl")
print("✅ Model loaded successfully!")

✅ Model loaded successfully!


In [12]:
# Step 2: Load your test dataset
test_df = pd.read_csv("ewaste_test.csv")
X_test = test_df['query']
y_test = test_df['label']

In [13]:
# Step 3: Make predictions
y_pred = model.predict(X_test)

In [14]:
# Step 4: Evaluate performance
print("\n🎯 Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))
print("\n🧩 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


🎯 Accuracy: 0.99

📊 Classification Report:
                precision    recall  f1-score   support

       battery       1.00      1.00      1.00        10
       charger       1.00      1.00      1.00        10
      keyboard       1.00      1.00      1.00        10
        laptop       1.00      1.00      1.00        10
  mobile phone       0.91      1.00      0.95        10
       printer       1.00      1.00      1.00        10
  refrigerator       1.00      1.00      1.00        10
       speaker       1.00      1.00      1.00        10
storage device       1.00      1.00      1.00        10
    television       1.00      0.90      0.95        10

      accuracy                           0.99       100
     macro avg       0.99      0.99      0.99       100
  weighted avg       0.99      0.99      0.99       100


🧩 Confusion Matrix:
[[10  0  0  0  0  0  0  0  0  0]
 [ 0 10  0  0  0  0  0  0  0  0]
 [ 0  0 10  0  0  0  0  0  0  0]
 [ 0  0  0 10  0  0  0  0  0  0]
 [ 0  0  0  0 10

In [15]:
# Step 5: Optional - Interactive testing
def predict_category(query):
    return model.predict([query])[0]

print("\n💬 Try your own queries!")
while True:
    user_input = input("Enter electronic item (or 'exit' to quit): ")
    if user_input.lower() == "exit":
        break
    print("Predicted Category:", predict_category(user_input))


💬 Try your own queries!
Enter electronic item (or 'exit' to quit): lappy
Predicted Category: laptop
Enter electronic item (or 'exit' to quit): mobile
Predicted Category: mobile phone
Enter electronic item (or 'exit' to quit): iphone
Predicted Category: mobile phone
Enter electronic item (or 'exit' to quit): mi phone
Predicted Category: mobile phone
Enter electronic item (or 'exit' to quit): mi tv
Predicted Category: television
Enter electronic item (or 'exit' to quit): battery
Predicted Category: battery
Enter electronic item (or 'exit' to quit): cell
Predicted Category: battery
Enter electronic item (or 'exit' to quit): lithium ion battery
Predicted Category: battery
Enter electronic item (or 'exit' to quit): exit


# Image model

In [6]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.metrics import classification_report, confusion_matrix

In [10]:
# ===============================
# Step 2: Dataset Paths & Parameters
# ===============================
train_dir = '/content/drive/MyDrive/E-Waste Dataset/train'
valid_dir = '/content/drive/MyDrive/E-Waste Dataset/val'
test_dir  = '/content/drive/MyDrive/E-Waste Dataset/test'

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Count classes
num_classes = len(os.listdir(train_dir))
print("Number of classes:", num_classes)

Number of classes: 10


In [11]:
# ===============================
# Step 3: Data Generators
# ===============================
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    shear_range=0.1,
    fill_mode='nearest'
)

valid_datagen = ImageDataGenerator(rescale=1./255)
test_datagen  = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

valid_generator = valid_datagen.flow_from_directory(
    valid_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)


Found 2400 images belonging to 10 classes.
Found 300 images belonging to 10 classes.
Found 300 images belonging to 10 classes.


In [12]:
# ===============================
# Step 4: Build Model (MobileNetV2 Transfer Learning)
# ===============================
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3))
base_model.trainable = False  # Freeze base initially

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer=Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [13]:
# ===============================
# Step 5: Callbacks
# ===============================
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6),
    ModelCheckpoint('ewaste_image_model_best.h5', monitor='val_accuracy', save_best_only=True)
]

In [14]:
# ===============================
# Step 6: Train Model
# ===============================
EPOCHS = 10
history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=EPOCHS,
    callbacks=callbacks
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 449ms/step - accuracy: 0.5775 - loss: 1.2184



[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m166s[0m 2s/step - accuracy: 0.5796 - loss: 1.2127 - val_accuracy: 0.8967 - val_loss: 0.3121 - learning_rate: 0.0010
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 461ms/step - accuracy: 0.8496 - loss: 0.4385



[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 481ms/step - accuracy: 0.8498 - loss: 0.4382 - val_accuracy: 0.9300 - val_loss: 0.2109 - learning_rate: 0.0010
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 474ms/step - accuracy: 0.8857 - loss: 0.3338 - val_accuracy: 0.9300 - val_loss: 0.2033 - learning_rate: 0.0010
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 479ms/step - accuracy: 0.9091 - loss: 0.2636 - val_accuracy: 0.9233 - val_loss: 0.1974 - learning_rate: 0.0010
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 450ms/step - accuracy: 0.9142 - loss: 0.2619



[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 469ms/step - accuracy: 0.9142 - loss: 0.2619 - val_accuracy: 0.9600 - val_loss: 0.1555 - learning_rate: 0.0010
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 475ms/step - accuracy: 0.9098 - loss: 0.2521 - val_accuracy: 0.9367 - val_loss: 0.2053 - learning_rate: 0.0010
Epoch 7/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 480ms/step - accuracy: 0.9347 - loss: 0.2115 - val_accuracy: 0.9400 - val_loss: 0.1593 - learning_rate: 0.0010
Epoch 8/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 465ms/step - accuracy: 0.9398 - loss: 0.1801 - val_accuracy: 0.9367 - val_loss: 0.1715 - learning_rate: 0.0010
Epoch 9/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 475ms/step - accuracy: 0.9348 - loss: 0.1989 - val_accuracy: 0.9533 - val_loss: 0.1600 - learning_rate: 5.0000e-

In [15]:
# ===============================
# Step 7: Fine-tune Base Model (Optional)
# ===============================
base_model.trainable = True
for layer in base_model.layers[:100]:  # Freeze first 100 layers
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history_finetune = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=10,
    callbacks=callbacks
)

Epoch 1/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 596ms/step - accuracy: 0.8175 - loss: 0.5697 - val_accuracy: 0.8867 - val_loss: 0.3329 - learning_rate: 1.0000e-04
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 476ms/step - accuracy: 0.9113 - loss: 0.2627 - val_accuracy: 0.9033 - val_loss: 0.2641 - learning_rate: 1.0000e-04
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 477ms/step - accuracy: 0.9232 - loss: 0.2139 - val_accuracy: 0.9500 - val_loss: 0.1837 - learning_rate: 1.0000e-04
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 482ms/step - accuracy: 0.9548 - loss: 0.1318 - val_accuracy: 0.9133 - val_loss: 0.3066 - learning_rate: 1.0000e-04
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 481ms/step - accuracy: 0.9595 - loss: 0.1214 - val_accuracy: 0.9000 - val_loss: 0.3214 - learning_rate: 1.0000e-04
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━

In [16]:
# ===============================
# Step 8: Evaluate on Test Set
# ===============================
loss, acc = model.evaluate(test_generator)
print("✅ Test Accuracy:", acc)

y_true = test_generator.classes
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

class_names = list(test_generator.class_indices.keys())

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=class_names))

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:")
print(cm)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 8s/step - accuracy: 0.9003 - loss: 0.2579
✅ Test Accuracy: 0.8999999761581421
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 515ms/step

Classification Report:
                 precision    recall  f1-score   support

        Battery       0.83      0.80      0.81        30
       Keyboard       0.97      1.00      0.98        30
      Microwave       0.81      1.00      0.90        30
         Mobile       0.90      0.90      0.90        30
          Mouse       1.00      0.93      0.97        30
            PCB       0.93      0.87      0.90        30
         Player       0.93      0.87      0.90        30
        Printer       0.89      0.83      0.86        30
     Television       0.86      0.83      0.85        30
Washing Machine       0.91      0.97      0.94        30

       accuracy                           0.90       300
      macro avg       0.90      0.90      0.90       300
   weighted avg

In [17]:
# ===============================
# Step 9: Save Final Model
# ===============================
model.save('ewaste_image_model_final.h5')
print("🎉 Model saved successfully!")



🎉 Model saved successfully!
