In [1]:
import os
import json
import numpy as np
import cv2
import pandas as pd

from collections import OrderedDict
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder

In [2]:
mapping = {}
with open('material_dataset_135_scenes/processed_materials.json', 'r') as f:
    dataframe = json.load(f)
for img_name, data in dataframe.items():
    ordered_data = OrderedDict(sorted(data.items(), key=lambda x: x[0]))
    for key, values in ordered_data.items():
        for value in values:
            if key == 'material':
                if value not in mapping.keys():
                    mapping[value] = set()
                temp = value
            elif key == 'name' and value != '':
                mapping[temp].add(value)


def calculate_edge_quality(image):
    # Применяем фильтр Собеля для обнаружения границ
    sobel_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)

    # Вычисляем градиентную амплитуду и угол
    gradient_magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
    gradient_angle = np.arctan2(sobel_y, sobel_x)

    # Оцениваем четкость границ (высокая амплитуда градиента = более четкие границы)
    edge_sharpness = np.mean(gradient_magnitude)

    # Вычисляем текстурность (большой разброс углов = более грубая текстура)
    angle_variance = np.var(gradient_angle)
    texture_roughness = 1 / (1 + angle_variance)

    return edge_sharpness, texture_roughness


def apply_custom_gabor_transform(image):
    # Создание собственного ядра Габора с указанным углом
    means = []
    for angle in range(0, 90, 5):
        kernel_size = 11
        sigma = 5
        theta = angle
        lambda_ = 10
        gamma = 0.5
        psi = 0
        gabor_kernel = cv2.getGaborKernel((kernel_size, kernel_size), sigma, theta, lambda_, gamma, psi)

        # Применение фильтра Габора к изображению
        filtered_image = cv2.filter2D(image, cv2.CV_8UC3, gabor_kernel)
        means.append(np.std(filtered_image))

    filtered_image = np.mean(means)
    return filtered_image


def get_statistics(image, eps=1e-10):
    # Преобразование изображения в одномерный массив интенсивностей пикселей
    intensities = image.ravel()

    # Вычисление эксцесса распределения интенсивности пикселей и станд. откл.
    mean_intensity = np.mean(intensities)
    std_dev = np.std(intensities)
    kurtosis = (np.mean((intensities - mean_intensity)**4) / (std_dev**4 + eps)) - 3

    hist = np.zeros(256)
    idxs, values = np.unique(image, return_counts=True)
    hist[idxs] = values
    hist = hist.ravel() / hist.sum()
    entropy = -np.sum(hist * np.log2(hist + eps))
    return kurtosis, std_dev, entropy

def check_substring(string, substring):
    index = string.find(substring)
    if index != -1:
        return True
    else:
        return False

def get_label(gt, mapping):
    ''' Сформировать целевую переменную (строка, указывающая материал) '''
    label = gt[0]
    return label

main_dir = 'material_dataset_135_scenes'
for mode in ['train_data_reduced.json', 'val_data_reduced.json', 'test_data_reduced.json']:
    if mode == 'train_data_reduced.json':
        with open(os.path.join(main_dir, mode), 'r') as f:
            train_df = json.load(f)
    elif mode == 'val_data_reduced.json':
        with open(os.path.join(main_dir, mode), 'r') as f:
            val_df = json.load(f)
    elif mode == 'test_data_reduced.json':
        with open(os.path.join(main_dir, mode), 'r') as f:
            test_df = json.load(f)

train_images = []
train_labels = []
test_images = []
test_labels = []
train_lwi = []
test_lwi = []
images = []
for subdir in os.listdir(main_dir):
    if not os.path.isdir(os.path.join(main_dir, subdir)):
        continue
    files = os.listdir(os.path.join(main_dir, subdir))
    for file in files:
        if get_label(file.lower(), mapping) is not None:
            images.append(os.path.join(main_dir, subdir, file))
            dir = os.path.join(main_dir.split("/")[-1], subdir, file)
            if dir in train_df.keys() or dir in val_df.keys():
                train_images.append(os.path.join(main_dir, subdir, file))
                train_labels.append(get_label(dataframe[dir]['material'], mapping))
                train_lwi.append([file, get_label(dataframe[dir]['material'], mapping)])
            elif dir in test_df.keys():
                test_images.append(os.path.join(main_dir, subdir, file))
                test_labels.append(get_label(dataframe[dir]['material'], mapping))
                test_lwi.append([file, get_label(dataframe[dir]['material'], mapping)])

train_features = []
for img_num, img in enumerate(tqdm(train_images)):
    img_name = img.split('/')[-1].split('.')[0]
    image = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
    target_size = (256, 256)
    image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
    edge_sharpness, texture_roughness = calculate_edge_quality(image)
    fragility = apply_custom_gabor_transform(image)
    kurtosis, std, entropy = get_statistics(image)
    train_features.append([img_name, edge_sharpness, texture_roughness, fragility, kurtosis, std, entropy])

test_features = []
for img_num, img in enumerate(tqdm(test_images)):
    img_name = img.split('/')[-1].split('.')[0]
    image = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
    target_size = (256, 256)
    image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
    edge_sharpness, texture_roughness = calculate_edge_quality(image)
    fragility = apply_custom_gabor_transform(image)
    kurtosis, std, entropy = get_statistics(image)
    test_features.append([img_name, edge_sharpness, texture_roughness, fragility, kurtosis, std, entropy])

print(f'Train: {len(train_features)}, Test: {len(test_features)}')

100%|██████████| 23126/23126 [04:30<00:00, 85.37it/s]
100%|██████████| 5306/5306 [01:01<00:00, 85.66it/s]


5306

In [3]:
small_test_features = []
for img_num, img in enumerate(tqdm(os.listdir('clip2test'))):
    img = os.path.join('clip2test', img)
    img_name = img.split('/')[-1].split('.')[0]
    image = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
    target_size=(256, 256)
    image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
    edge_sharpness, texture_roughness = calculate_edge_quality(image)
    fragility = apply_custom_gabor_transform(image)
    kurtosis, std, entropy = get_statistics(image)
    small_test_features.append([img_name, edge_sharpness, texture_roughness, fragility, kurtosis, std, entropy])
small_test_features_df = pd.DataFrame(small_test_features, columns=['image_name', 'edge_sharpness', 'texture_roughness',
                                           'fragility', 'kurtosis', 'std', 'entropy'])

100%|██████████| 5/5 [00:00<00:00, 73.14it/s]

clip2test/cam_00_0071_lamp_67.png
clip2test/cam_00_0047_equipment_48.png
clip2test/cam_00_0019_woodwork_03.png
clip2test/cam_00_0004_equipment_94.png
clip2test/cam_00_0028_carafe.png





In [5]:
train_features_df = pd.DataFrame(train_features, columns=['image_name', 'edge_sharpness', 'texture_roughness',
                                           'fragility', 'kurtosis', 'std', 'entropy']).iloc[:, 1:]
test_features_df = pd.DataFrame(test_features, columns=['image_name', 'edge_sharpness', 'texture_roughness',
                                           'fragility', 'kurtosis', 'std', 'entropy']).iloc[:, 1:]
train_features_df.head()

Unnamed: 0,edge_sharpness,texture_roughness,fragility,kurtosis,std,entropy
0,6.990025,0.888812,45.832957,35.926513,28.126146,0.470099
1,36.95542,0.343047,125.369179,-1.813203,95.414727,5.066559
2,69.37003,0.276115,103.679731,-1.183836,87.899768,6.487707
3,15.313591,0.469666,112.159605,0.016655,39.021435,2.78537
4,22.827642,0.327989,123.807865,-1.781396,59.153972,4.812185


In [6]:
print(train_features_df.shape, test_features_df.shape)
print(train_features_df.isna().any().any(), test_features_df.isna().any().any())
print(train_features_df.isna().mean())

(23126, 6) (5306, 6)
False False
edge_sharpness       0.0
texture_roughness    0.0
fragility            0.0
kurtosis             0.0
std                  0.0
entropy              0.0
dtype: float64


In [7]:
le = LabelEncoder()
train_labels_encoded = le.fit_transform(train_labels)
test_labels_encoded = le.transform(test_labels)

In [20]:
_labels = ["wood", "glass", "metal", "plastic", "wood", "textile"]
label_map = {index: label for index, label in enumerate(le.classes_)}
print("Label Map:", label_map)

Label Map: {0: 'ceramics', 1: 'glass', 2: 'metal', 3: 'paper', 4: 'plastic', 5: 'textile', 6: 'wood'}


In [8]:
train_labels_df = pd.Series(train_labels_encoded)
test_labels_df = pd.Series(test_labels_encoded)

In [9]:
l0, l1 = np.sum(train_labels_df==0), np.sum(train_labels_df==1)
l2, l3 =  np.sum(train_labels_df==2), np.sum(train_labels_df==3)
l4, l5, l6 = np.sum(train_labels_df==4), np.sum(train_labels_df==5), np.sum(train_labels_df==6)
lmax = max(l0, l1, l2, l3, l4, l5, l6)
to_add0, to_add1, to_add2, to_add3, to_add4, to_add5, to_add6 = lmax - l0, lmax - l1, lmax - l2, lmax - l3, lmax - l4, lmax - l5, lmax - l6
print(to_add0, to_add1, to_add2, to_add3, to_add4, to_add5, to_add6)
print(l0, l1, l2, l3, l4, l5, l6, mapping.keys())

5815 1953 455 6032 3706 4966 0
764 4626 6124 547 2873 1613 6579 dict_keys(['wood', 'textile', 'ceramics', 'metal', 'glass', 'plastic', 'paper'])


In [10]:
np.random.seed(2)
indices_to_add0 = np.random.randint(0, l0, size=to_add0)
indices_to_add1 = np.random.randint(0, l1, size=to_add1)
indices_to_add2 = np.random.randint(0, l2, size=to_add2)
indices_to_add3 = np.random.randint(0, l3, size=to_add3)
indices_to_add4 = np.random.randint(0, l4, size=to_add4)
indices_to_add5 = np.random.randint(0, l5, size=to_add5)
indices_to_add6 = np.random.randint(0, l6, size=to_add6)
train_features_np = np.array(train_features_df)
X_train_to_add0 = train_features_np[train_labels_df.to_numpy()==0, :][indices_to_add0, :]
X_train_to_add1 = train_features_np[train_labels_df.to_numpy()==1, :][indices_to_add1, :]
X_train_to_add2 = train_features_np[train_labels_df.to_numpy()==2, :][indices_to_add2, :]
X_train_to_add3 = train_features_np[train_labels_df.to_numpy()==3, :][indices_to_add3, :]
X_train_to_add4 = train_features_np[train_labels_df.to_numpy()==4, :][indices_to_add4, :]
X_train_to_add5 = train_features_np[train_labels_df.to_numpy()==5, :][indices_to_add5, :]
X_train_to_add6 = train_features_np[train_labels_df.to_numpy()==6, :][indices_to_add6, :]

In [11]:
train_features_np_equal = np.vstack([train_features_np, X_train_to_add0, X_train_to_add1, X_train_to_add2,
                  X_train_to_add3, X_train_to_add4, X_train_to_add5, X_train_to_add6])

In [12]:
y_train_to_add0 = np.zeros(X_train_to_add0.shape[0])
y_train_to_add1, y_train_to_add2 = np.zeros(X_train_to_add1.shape[0]) + 1, np.zeros(X_train_to_add2.shape[0]) + 2
y_train_to_add3, y_train_to_add4 = np.zeros(X_train_to_add3.shape[0]) + 3, np.zeros(X_train_to_add4.shape[0]) + 4
y_train_to_add5, y_train_to_add6 = np.zeros(X_train_to_add5.shape[0]) + 5, np.zeros(X_train_to_add6.shape[0]) + 6
train_labels_np_equal = np.array(train_labels_df)
train_labels_np_equal = np.hstack([train_labels_np_equal, y_train_to_add0, y_train_to_add1, y_train_to_add2,
                     y_train_to_add3, y_train_to_add4, y_train_to_add5, y_train_to_add6])
train_labels_np_equal.shape, train_features_np_equal.shape

((46053,), (46053, 6))

# RandomForestClassifier Model

In [17]:
import joblib
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=50, max_depth=35, criterion='entropy',
                            class_weight='balanced', random_state=2)
rf.fit(train_features_np_equal, train_labels_np_equal)


In [18]:
model_save_path = 'random_forest_model.pkl'
joblib.dump(rf, model_save_path)
print(f"Model saved to {model_save_path}")

Model saved to random_forest_model.pkl


In [15]:
predicts = rf.predict(test_features_df.values)
faults = []
cnt = 0
for i in range(len(predicts)):
    if predicts[i] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1
print(f"Accuracy = {round(cnt / len(predicts) * 100, 2)}")

Accuracy = 51.64


In [16]:
predicts = rf.predict(small_test_features_df.iloc[:, 1:].values)
faults = []
cnt = 0
for i in range(len(predicts)):
    if predicts[i] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1
    print(small_test_features_df.iloc[i, 0], le.inverse_transform([round(predicts[i])]))
print(f"Accuracy = {round(cnt / len(predicts) * 100, 2)}")

cam_00_0071_lamp_67 ['ceramics']
cam_00_0047_equipment_48 ['plastic']
cam_00_0019_woodwork_03 ['glass']
cam_00_0004_equipment_94 ['paper']
cam_00_0028_carafe ['metal']
Accuracy = 20.0


In [133]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import numpy as np

param_grid = {
    'n_estimators': [25, 50, 75, 100],
    'max_depth': [15, 20, 25, 30],
    'criterion': ['gini', 'entropy'],
    'class_weight': ['balanced', None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=2)

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2, scoring='accuracy')
grid_search.fit(train_features_np_equal, train_labels_np_equal)

best_params = grid_search.best_params_
print(f"Best Parameters: {best_params}")

best_rf = grid_search.best_estimator_
predictions = best_rf.predict(test_features_df.values)
final_accuracy = accuracy_score(test_labels_df, predictions)
print(f"Final Accuracy with Best Parameters = {round(final_accuracy * 100, 2)}")

Fitting 5 folds for each of 576 candidates, totalling 2880 fits


[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=2, n_estimators=25; total time=   2.1s
[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=25; total time=   2.1s
[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=25; total time=   2.1s
[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=2, n_estimators=25; total time=   2.3s
[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=2, n_estimators=25; total time=   2.4s
[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=5, n_estimators=25; total time=   2.3s
[CV] END class_weight=balanced, criterion=gini, max_depth=15, min_samples_leaf=1, min_samples_split=2, n_estimators=25; total time=   2.5s
[CV] END class_weight=balan

# AdaBoostClassifier Model

In [112]:
from sklearn.ensemble import AdaBoostClassifier

ada = AdaBoostClassifier(n_estimators=50, random_state=2)
ada.fit(train_features_np_equal, train_labels_np_equal)

predicts = ada.predict(test_features_df.values)
faults = []
cnt = 0
for i in range(len(predicts)):
    if predicts[i] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1

print(f"AdaBoost Accuracy = {round(cnt / len(predicts) * 100, 2)}")

AdaBoost Accuracy = 39.05


# XGBClassifier Model

In [107]:
from xgboost import XGBClassifier

xgb = XGBClassifier(n_estimators=20, max_depth=15, subsample=0.8, colsample_bytree=0.8, 
                    random_state=2, n_jobs=-1, use_label_encoder=False, eval_metric='mlogloss')
xgb.fit(train_features_np_equal, train_labels_np_equal)

predicts = xgb.predict(test_features_df.values)
faults = []
cnt = 0
for i in range(len(predicts)):
    if predicts[i] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1

print(f"XGBoost Accuracy = {round(cnt / len(predicts) * 100, 2)}")

XGBoost Accuracy = 50.4


# LGBMClassifier Model

In [140]:
from lightgbm import LGBMClassifier

lgbm = LGBMClassifier(n_estimators=200, max_depth=15, random_state=2)
lgbm.fit(train_features_np_equal, train_labels_np_equal)

predicts = lgbm.predict(test_features_df.values)
faults = []
cnt = 0
for i in range(len(predicts)):
    if predicts[i] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1

print(f"LightGBM Accuracy = {round(cnt / len(predicts) * 100, 2)}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000924 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1530
[LightGBM] [Info] Number of data points in the train set: 46053, number of used features: 6
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
[LightGBM] [Info] Start training from score -1.945910
LightGBM Accuracy = 46.95


# RandomForestClassifier benchmarking

In [141]:
import time

predicts = []
faults = []
cnt = 0
times = []

for i in tqdm(range(len(test_labels_df))):
    start_time = time.time()
    predict = rf.predict(np.array(test_features_df.iloc[i, :]).reshape(1, -1))    
    end_time = time.time()

    times.append((end_time - start_time)*1000)

    if predict[0] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1

    predicts.append(predict[0])

print(f"Number of correct predictions: {round(100 * cnt / len(predicts), 2)}%")
print(f"Average time: {round(sum(times) / len(times), 2)}ms")

100%|██████████| 5306/5306 [00:05<00:00, 900.92it/s]

Number of correct predictions: 51.64%
Average time: 1.1ms





In [48]:
import time

predicts = []
faults = []
cnt = 0
times = []

for i in tqdm(range(len(small_test_features_df))):
    start_time = time.time()
    predict = rf.predict(np.array(small_test_features_df.iloc[i, 1:]).reshape(1, -1))    
    end_time = time.time()

    times.append((end_time - start_time)*1000)

    if predict[0] != test_labels_df[i]:
        faults.append(i)
    else:
        cnt += 1

    print(small_test_features_df.iloc[i, 0], le.inverse_transform([round(predict[0])]))
    predicts.append(predict[0])

print(f"Number of correct predictions: {round(100 * cnt / len(predicts), 2)}%")
print(f"Average time: {round(sum(times) / len(times), 2)}ms")

100%|██████████| 1/1 [00:00<00:00, 346.61it/s]

cam_00_0019_woodwork_03 ['glass']
Number of correct predictions: 100.0%
Average time: 2.5ms



