In [3]:
import kagglehub

# Download latest version
# path = kagglehub.dataset_download("sumn2u/garbage-classification-v2")
path = "/home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8/"
print("Path to dataset files:", path)

Path to dataset files: /home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8/


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import os
import cv2
import numpy as np
import pandas as pd
classes = os.listdir(os.path.join(path, "garbage-dataset"))
print(classes)

['trash', 'glass', 'battery', 'clothes', 'metal', 'plastic', 'cardboard', 'paper', 'biological', 'shoes']


In [5]:
def calculate_color_amounts(image):
    # Convert the image to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Define the range for yellow color in HSV
    lower_yellow = np.array([20, 100, 100])
    upper_yellow = np.array([30, 255, 255])
    yellow_mask = cv2.inRange(hsv_image, lower_yellow, upper_yellow)
    yellow_amount = cv2.countNonZero(yellow_mask)

    # Define the range for silver color in HSV
    lower_silver = np.array([0, 0, 200])
    upper_silver = np.array([180, 25, 255])
    silver_mask = cv2.inRange(hsv_image, lower_silver, upper_silver)
    silver_amount = cv2.countNonZero(silver_mask)

    return yellow_amount, silver_amount

In [6]:
test_im = image = cv2.imread("/home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8/garbage-dataset/metal/metal_181.jpg")
print(calculate_color_amounts(test_im))

(0, 34841)


In [8]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Define the path to the dataset
base_path = "/home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8/garbage-dataset"
classes = ['trash', 'glass', 'battery', 'clothes', 'metal', 'plastic', 'cardboard', 'paper', 'biological', 'shoes']

# Initialize lists to hold features and labels
features = []
labels = []

# Loop through each class and extract features from images
for class_name in classes:
    class_path = os.path.join(base_path, class_name)
    
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        
        # Read the image
        image = cv2.imread(image_path)
        
        if image is not None:
            yellow_amount, silver_amount = calculate_color_amounts(image)
            features.append([yellow_amount, silver_amount])
            labels.append(class_name)

# Convert features and labels to a DataFrame
df = pd.DataFrame(features, columns=['yellow_amount', 'silver_amount'])
df['label'] = labels

# Split the dataset into training and testing sets
X = df[['yellow_amount', 'silver_amount']]
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Function to classify a new image
def classify_image(image_path):
    image = cv2.imread(image_path)
    if image is not None:
        yellow_amount, silver_amount = calculate_color_amounts(image)
        feature = np.array([[yellow_amount, silver_amount]])
        prediction = classifier.predict(feature)
        return prediction[0]
    else:
        return None


result = classify_image('/home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8/garbage-dataset/metal')
print("Predicted class:", result)

Accuracy: 0.22261573488489755
              precision    recall  f1-score   support

     battery       0.07      0.06      0.06       192
  biological       0.23      0.25      0.24       190
   cardboard       0.32      0.29      0.30       386
     clothes       0.34      0.38      0.36      1086
       glass       0.17      0.17      0.17       617
       metal       0.09      0.08      0.08       191
       paper       0.18      0.15      0.16       343
     plastic       0.15      0.14      0.14       392
       shoes       0.15      0.15      0.15       392
       trash       0.06      0.07      0.07       164

    accuracy                           0.22      3953
   macro avg       0.18      0.17      0.17      3953
weighted avg       0.22      0.22      0.22      3953

Predicted class: None


In [16]:
result = classify_image('/home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8/garbage-dataset/metal/metal_876.jpg')
print("Predicted class:", result)

Predicted class: metal


