In [39]:
import cv2
from skimage.feature import local_binary_pattern
import matplotlib.pyplot as plt
from cuml.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import os
from tqdm import tqdm 
import numpy as np
from cuml.ensemble import RandomForestClassifier

In [40]:
def get_lbp(img_path, radius=1, n_points=8, method='ror'):
    image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    image=cv2.resize(image, (224, 224), interpolation=cv2.INTER_AREA)
    lbp = local_binary_pattern(image, P=n_points, R=radius, method=method)
    return lbp

In [41]:
def get_histogram(img_path,bins=[8,4,2]):
    img=cv2.imread(img_path)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,[0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

In [42]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern

def get_lbp(img_path, grid_size=3, target_size=224):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (target_size, target_size))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Tính LBP
    lbp = local_binary_pattern(gray, P=8, R=1, method='uniform')
    
    h, w = lbp.shape
    bin_h = h // grid_size
    bin_w = w // grid_size
    
    features = []
    for i in range(grid_size):
        for j in range(grid_size):
            # Lấy ô con
            patch = lbp[i*bin_h:(i+1)*bin_h, j*bin_w:(j+1)*bin_w]
            # Tính histogram cho ô này
            hist, _ = np.histogram(patch.ravel(), bins=256, range=(0, 256), density=True)
            features.append(hist)
    
    return np.concatenate(features)  # shape: (grid_size*grid_size*10,)

In [43]:
len(get_lbp("/kaggle/input/hath-food/food dataset/Images/train/Banh canh/1.jpg"))

2304

In [44]:
BASE_PATH = "/kaggle/input/hath-food/food dataset/Images"

In [45]:
def load_dataset(root_path, subset="train"):

    data = []
    labels = []
    
    path = os.path.join(root_path, subset)
    if not os.path.exists(path):
        print(f"error in finding {path}")
        return np.array([]), np.array([])

    print(f"loading {subset}...")
    
    classes = os.listdir(path)
    
    for class_name in classes:
        class_dir = os.path.join(path, class_name)
        if not os.path.isdir(class_dir):
            continue
            
      
        files = os.listdir(class_dir)
        for file in tqdm(files, desc=class_name, leave=False):
            image_path = os.path.join(class_dir, file)
            
            hist = get_lbp(image_path)
            color_hist=get_histogram(image_path)
            if hist is not None:
                data.append(np.concatenate((hist,color_hist)))
                labels.append(class_name)
                
    return np.array(data), np.array(labels)

In [47]:
X_train, y_train_text = load_dataset(BASE_PATH, "train")
X_test, y_test_text = load_dataset(BASE_PATH, "test")

loading train...


Goi cuon:  49%|████▉     | 296/598 [00:09<00:14, 20.85it/s]        Corrupt JPEG data: 229 extraneous bytes before marker 0xd9
Corrupt JPEG data: 229 extraneous bytes before marker 0xd9
Nem chua:  22%|██▏       | 84/379 [00:02<00:12, 23.04it/s]  Corrupt JPEG data: 9 extraneous bytes before marker 0xe2
Corrupt JPEG data: 9 extraneous bytes before marker 0xe2
                                                                  

loading test...


                                                                   

In [50]:
le = LabelEncoder()
y_train = le.fit_transform(y_train_text)
y_test = le.transform(y_test_text) 

In [68]:
from sklearn.svm import SVC
model = SVC(kernel='rbf', C=10.0, gamma='scale', random_state=42, cache_size=100)
model.fit(X_train, y_train)

In [69]:
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)

In [71]:
print(acc)

0.2859126984126984


In [70]:
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np
print("Độ chính xác (và các metric) theo từng lớp:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

Độ chính xác (và các metric) theo từng lớp:
                  precision    recall  f1-score   support

        Banh beo       0.22      0.23      0.23       129
    Banh bot loc       0.18      0.10      0.13       144
        Banh can       0.26      0.21      0.23       149
       Banh canh       0.15      0.10      0.12       193
      Banh chung       0.42      0.34      0.38       102
       Banh cuon       0.27      0.38      0.32       228
        Banh duc       0.17      0.09      0.12       133
        Banh gio       0.37      0.29      0.32       129
       Banh khot       0.24      0.21      0.22       167
         Banh mi       0.31      0.44      0.36       268
        Banh pia       0.42      0.31      0.36        89
        Banh tet       0.32      0.25      0.28       138
Banh trang nuong       0.33      0.40      0.36       159
        Banh xeo       0.45      0.55      0.49       235
      Bun bo Hue       0.21      0.36      0.27       306
 Bun dau mam tom       0.26

In [14]:
import joblib
joblib.dump(model, 'svm_lbp_model.joblib')

['svm_lbp_model.joblib']

In [15]:
classifier = RandomForestClassifier(n_estimators=500,min_samples_split=2,min_samples_leaf=2,max_depth=40)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)

In [16]:
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np
print("Độ chính xác (và các metric) theo từng lớp:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

Độ chính xác (và các metric) theo từng lớp:
                  precision    recall  f1-score   support

        Banh beo       0.94      0.12      0.22       129
    Banh bot loc       0.67      0.03      0.05       144
        Banh can       0.53      0.07      0.12       149
       Banh canh       0.21      0.06      0.09       193
      Banh chung       0.72      0.23      0.34       102
       Banh cuon       0.19      0.31      0.24       228
        Banh duc       0.00      0.00      0.00       133
        Banh gio       0.44      0.09      0.14       129
       Banh khot       0.53      0.05      0.10       167
         Banh mi       0.20      0.57      0.30       268
        Banh pia       0.39      0.31      0.35        89
        Banh tet       0.33      0.12      0.18       138
Banh trang nuong       0.47      0.18      0.26       159
        Banh xeo       0.27      0.60      0.37       235
      Bun bo Hue       0.12      0.64      0.20       306
 Bun dau mam tom       0.23

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
import joblib
joblib.dump(model, 'random_forest_lbp_model.joblib')

['random_forest_lbp_model.joblib']

In [61]:
import xgboost as xgb
from sklearn.metrics import classification_report

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    'objective': 'multi:softmax', # trả về class trực tiếp
    'num_class': 30,
    'learning_rate': 0.1,
    'max_depth': 6,
    'eval_metric': 'mlogloss',
}

bst = xgb.train(params, dtrain, num_boost_round=300, evals=[(dtest, 'test')], early_stopping_rounds=10)

# Predict
y_pred = bst.predict(dtest)

# Report chi tiết theo class
print("Độ chính xác (và các metric) theo từng lớp:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


[0]	test-mlogloss:3.31900
[1]	test-mlogloss:3.25739
[2]	test-mlogloss:3.20686
[3]	test-mlogloss:3.16242
[4]	test-mlogloss:3.12241
[5]	test-mlogloss:3.08527
[6]	test-mlogloss:3.05324
[7]	test-mlogloss:3.02250
[8]	test-mlogloss:2.99486
[9]	test-mlogloss:2.96914
[10]	test-mlogloss:2.94619
[11]	test-mlogloss:2.92414
[12]	test-mlogloss:2.90409
[13]	test-mlogloss:2.88442
[14]	test-mlogloss:2.86536
[15]	test-mlogloss:2.84740
[16]	test-mlogloss:2.83270
[17]	test-mlogloss:2.81781
[18]	test-mlogloss:2.80408
[19]	test-mlogloss:2.79139
[20]	test-mlogloss:2.77898
[21]	test-mlogloss:2.76576
[22]	test-mlogloss:2.75432
[23]	test-mlogloss:2.74338
[24]	test-mlogloss:2.73353
[25]	test-mlogloss:2.72477
[26]	test-mlogloss:2.71620
[27]	test-mlogloss:2.70746
[28]	test-mlogloss:2.69864
[29]	test-mlogloss:2.69028
[30]	test-mlogloss:2.68277
[31]	test-mlogloss:2.67547
[32]	test-mlogloss:2.66851
[33]	test-mlogloss:2.66228
[34]	test-mlogloss:2.65591
[35]	test-mlogloss:2.64975
[36]	test-mlogloss:2.64351
[37]	test-m