In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sumn2u/garbage-classification-v2")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/pasha/.cache/kagglehub/datasets/sumn2u/garbage-classification-v2/versions/8


In [2]:
import os
import cv2
import numpy as np
import pandas as pd
classes = os.listdir(os.path.join(path, "garbage-dataset"))
print(classes)

['trash', 'glass', 'battery', 'clothes', 'metal', 'plastic', 'cardboard', 'paper', 'biological', 'shoes']


## Извлечение фичей

In [3]:
def calculate_yellow_amount(image):
    """Calculate the amount of yellow color in the image."""
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_yellow = np.array([20, 100, 100])
    upper_yellow = np.array([30, 255, 255])
    yellow_mask = cv2.inRange(hsv_image, lower_yellow, upper_yellow)
    yellow_amount = cv2.countNonZero(yellow_mask)
    return yellow_amount

def calculate_silver_amount(image):
    """Calculate the amount of silver color in the image."""
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_silver = np.array([0, 0, 200])
    upper_silver = np.array([180, 25, 255])
    silver_mask = cv2.inRange(hsv_image, lower_silver, upper_silver)
    silver_amount = cv2.countNonZero(silver_mask)
    return silver_amount

def calculate_parallel_lines(image):
    """Calculate the amount of parallel lines in the image."""
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray_image, 50, 150, apertureSize=3)
    
    # Detect lines using Hough Transform
    lines = cv2.HoughLines(edges, 1, np.pi / 180, threshold=100)
    
    if lines is not None:
        return len(lines)  # Return the number of detected lines
    return 0  # No lines detected

def get_features(image):
    """Get a set of features (yellow amount, silver amount, parallel lines) from the image."""
    yellow_amount = calculate_yellow_amount(image)
    silver_amount = calculate_silver_amount(image)
    parallel_lines = calculate_parallel_lines(image)
    return yellow_amount, silver_amount, parallel_lines

In [17]:
test_im = image = cv2.imread(path + "/garbage-dataset/cardboard/cardboard_25.jpg")
print(get_features(test_im))

(15834, 284114, 140)


## Обучение классификатора

In [5]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Define the path to the dataset
base_path = path + "/garbage-dataset"
classes = ['trash', 'glass', 'battery', 'clothes', 'metal', 'plastic', 'cardboard', 'paper', 'biological', 'shoes']

# Initialize lists to hold features and labels
features = []
labels = []

# Loop through each class and extract features from images
for class_name in classes:
    class_path = os.path.join(base_path, class_name)
    
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        
        # Read the image
        image = cv2.imread(image_path)
        
        if image is not None:
            # Use the new functions to calculate yellow and silver amounts and parallel lines
            yellow_amount, silver_amount, parallel_lines = get_features(image)
            features.append([yellow_amount, silver_amount, parallel_lines])
            labels.append(class_name)

# Convert features and labels to a DataFrame
df = pd.DataFrame(features, columns=['yellow_amount', 'silver_amount', 'parallel_lines'])
df['label'] = labels

# Split the dataset into training and testing sets
X = df[['yellow_amount', 'silver_amount', 'parallel_lines']]
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = classifier.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Function to classify a new image
def classify_image(image_path):
    image = cv2.imread(image_path)
    if image is not None:
        # Use the new functions to calculate yellow and silver amounts and parallel lines
        yellow_amount, silver_amount, parallel_lines = get_features(image)
        feature = np.array([[yellow_amount, silver_amount, parallel_lines]])
        prediction = classifier.predict(feature)
        return prediction[0]
    else:
        return None

result = classify_image(path + '/garbage-dataset/metal/metal_286.jpg')  # Adjust the path to a specific image
print("Predicted class:", result)

Accuracy: 0.3458133063496079
              precision    recall  f1-score   support

     battery       0.27      0.22      0.24       192
  biological       0.26      0.33      0.29       190
   cardboard       0.38      0.39      0.39       386
     clothes       0.51      0.67      0.58      1086
       glass       0.24      0.22      0.23       617
       metal       0.08      0.07      0.08       191
       paper       0.28      0.21      0.24       343
     plastic       0.22      0.16      0.19       392
       shoes       0.20      0.16      0.18       392
       trash       0.23      0.21      0.22       164

    accuracy                           0.35      3953
   macro avg       0.27      0.26      0.26      3953
weighted avg       0.32      0.35      0.33      3953

Predicted class: metal




In [27]:
result = classify_image(path + '/garbage-dataset/cardboard/cardboard_169.jpg')  # Adjust the path to a specific image
print("Predicted class:", result)

Predicted class: cardboard




## Проверка гипотез