In [12]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sumn2u/garbage-classification-v2")

print("Path to dataset files:", path)

Path to dataset files: /home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8


In [13]:
import os
import cv2
import numpy as np
import pandas as pd
import warnings

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from collections import defaultdict

# Suppress warnings for better output
warnings.filterwarnings("ignore", category=UserWarning)

classes = os.listdir(os.path.join(path, "garbage-dataset"))
print(classes)

['trash', 'glass', 'battery', 'clothes', 'metal', 'plastic', 'cardboard', 'paper', 'biological', 'shoes']


## Извлечение фичей

In [14]:
def calculate_yellow_amount(image):
    """Calculate the amount of yellow color in the image."""
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_yellow = np.array([20, 100, 100])
    upper_yellow = np.array([30, 255, 255])
    yellow_mask = cv2.inRange(hsv_image, lower_yellow, upper_yellow)
    yellow_amount = cv2.countNonZero(yellow_mask)
    return yellow_amount

def calculate_silver_amount(image):
    """Calculate the amount of silver color in the image."""
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_silver = np.array([0, 0, 200])
    upper_silver = np.array([180, 25, 255])
    silver_mask = cv2.inRange(hsv_image, lower_silver, upper_silver)
    silver_amount = cv2.countNonZero(silver_mask)
    return silver_amount

def calculate_parallel_lines(image):
    """Calculate the amount of parallel lines in the image."""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray_image, 50, 150, apertureSize=3)
    
    # Detect lines using Hough Transform
    lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold=100)
    
    if lines is not None:
        return len(lines)  # Return the number of detected lines
    return 0  # No lines detected

def detect_cylinders(image):
    """Detect cylindrical shapes in the image."""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred_image = cv2.GaussianBlur(gray_image, (9, 9), 2)

    # Detect circles using Hough Transform
    circles = cv2.HoughCircles(blurred_image, cv2.HOUGH_GRADIENT, dp=1, minDist=30,
                               param1=100, param2=30, minRadius=10, maxRadius=100)

    if circles is not None:
        return circles.shape[1]  # Return the number of detected circles
    return 0  # No circles detected

def calculate_reflection(image):
    """Calculate the amount of bright areas in the image to detect reflections."""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Threshold to find bright areas
    _, bright_mask = cv2.threshold(gray_image, 200, 255, cv2.THRESH_BINARY)
    reflection_amount = cv2.countNonZero(bright_mask)
    return reflection_amount

def get_features(image):
    """Get a set of features (yellow amount, silver amount, parallel lines, cylindrical shapes, reflections) from the image."""
    yellow_amount = calculate_yellow_amount(image)
    silver_amount = calculate_silver_amount(image)
    parallel_lines = calculate_parallel_lines(image)
    cylinder_count = detect_cylinders(image)
    reflection_amount = calculate_reflection(image)
    return yellow_amount, silver_amount, parallel_lines, cylinder_count, reflection_amount

In [15]:
# Feature extraction testing
test_im = cv2.imread(path + "/garbage-dataset/cardboard/cardboard_25.jpg")
print(get_features(test_im))

(15834, 284114, 140, 15, 303454)


## Обучение классификатора

In [16]:
# Define the path to the dataset
base_path = path + "/garbage-dataset"
classes = ['trash', 'glass', 'battery', 'clothes', 'metal', 'plastic', 'cardboard', 'paper', 'biological', 'shoes']

# Initialize lists to hold features and labels
features = []
labels = []
test_images = []  # List to hold paths of test images

# Loop through each class and extract features from images
for class_name in classes:
    class_path = os.path.join(path + "/garbage-dataset/", class_name)
    image_names = os.listdir(class_path)

    # Split the images into training and testing sets (80% train, 20% test)
    train_images, test_images_class = train_test_split(image_names, test_size=0.2, random_state=42)

    # Process training images
    for image_name in train_images:
        image_path = os.path.join(class_path, image_name)
        image = cv2.imread(image_path)

        if image is not None:
            yellow_amount, silver_amount, parallel_lines, cylinder_count, reflection_amount = get_features(image)
            features.append([yellow_amount, silver_amount, parallel_lines, cylinder_count, reflection_amount])
            labels.append(class_name)

    # Collect test images for manual testing
    for image_name in test_images_class:
        test_images.append(os.path.join(class_path, image_name))

# Convert features and labels to a DataFrame
df = pd.DataFrame(features, columns=['yellow_amount', 'silver_amount', 'parallel_lines', 'cylinder_count', 'reflection_amount'])
df['label'] = labels

# Split the dataset into training and testing sets
X = df[['yellow_amount', 'silver_amount', 'parallel_lines', 'cylinder_count', 'reflection_amount']]
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Function to classify a new image
def classify_image(image_path):
    image = cv2.imread(image_path)
    if image is not None:
        yellow_amount, silver_amount, parallel_lines, cylinder_count, reflection_amount = get_features(image)
        feature = np.array([[yellow_amount, silver_amount, parallel_lines, cylinder_count, reflection_amount]])
        prediction = classifier.predict(feature)
        return prediction[0]
    else:
        return None

Accuracy: 0.4361163820366856
              precision    recall  f1-score   support

     battery       0.29      0.36      0.32       145
  biological       0.40      0.50      0.45       138
   cardboard       0.46      0.41      0.44       311
     clothes       0.56      0.76      0.65       886
       glass       0.33      0.35      0.34       455
       metal       0.23      0.14      0.17       163
       paper       0.38      0.24      0.30       263
     plastic       0.32      0.21      0.25       327
       shoes       0.36      0.30      0.33       317
       trash       0.39      0.28      0.33       157

    accuracy                           0.44      3162
   macro avg       0.37      0.36      0.36      3162
weighted avg       0.41      0.44      0.42      3162



In [17]:
# Classifier testing (runs classifier for a specific image)
result = classify_image(path + '/garbage-dataset/metal/metal_2370.jpg') 
print("Predicted class:", result)

Predicted class: glass


In [18]:
# Classifier testing (runs classifier for the remaining images)
def test_classify(test_images, target_class_name):
    predictions = defaultdict(int)
    for test_image in test_images:
        if target_class_name in test_image:  
            predicted_class = classify_image(test_image)  
            predictions[predicted_class] += 1 
    return dict(predictions)  

for class_name in classes:
    results = test_classify(test_images, class_name)
    print("Predicted class counts for test images containing " + class_name + ':')
    for class_name, count in results.items():
        print(f"Class: {class_name}, Count: {count}")

Predicted class counts for test images containing trash:
Class: metal, Count: 4
Class: glass, Count: 43
Class: trash, Count: 60
Class: plastic, Count: 14
Class: battery, Count: 15
Class: shoes, Count: 18
Class: cardboard, Count: 8
Class: clothes, Count: 20
Class: biological, Count: 4
Class: paper, Count: 4
Predicted class counts for test images containing glass:
Class: trash, Count: 25
Class: shoes, Count: 86
Class: metal, Count: 18
Class: glass, Count: 210
Class: clothes, Count: 129
Class: plastic, Count: 35
Class: battery, Count: 23
Class: paper, Count: 18
Class: biological, Count: 26
Class: cardboard, Count: 43
Predicted class counts for test images containing battery:
Class: glass, Count: 30
Class: paper, Count: 3
Class: biological, Count: 5
Class: clothes, Count: 26
Class: battery, Count: 77
Class: plastic, Count: 4
Class: shoes, Count: 25
Class: metal, Count: 5
Class: cardboard, Count: 11
Class: trash, Count: 3
Predicted class counts for test images containing clothes:
Class: clo

## Проверка гипотез