In [1]:
import numpy as np
import pandas as pd

In [2]:
import os
import cv2
import matplotlib.pyplot as plt

In [3]:
dataset_path = 'C:\\Users\\satya\\OneDrive\\Desktop\\keerthana\\DL_APSAC\\kaggle_crack_dataset'

for label in ['positive', 'negative']:
    folder = os.path.join(dataset_path, label)
    image_files = os.listdir(folder)

    print(f"{label.upper()}: Found {len(image_files)} images")


POSITIVE: Found 20000 images
NEGATIVE: Found 20000 images


In [4]:
def load_dataset(dataset_path, img_size=(128,128)):
    data = []
    labels = []
    for label in ['positive', 'negative']:
        folder = os.path.join(dataset_path, label)
        for img_name in os.listdir(folder):
            img_path = os.path.join(folder, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, img_size)
            data.append(img)
            labels.append(1 if label == 'posiive' else 0)
    return np.array(data), np.array(labels)

In [5]:
X,y = load_dataset(dataset_path)
X.shape, y.shape

((40000, 128, 128), (40000,))

In [6]:
from skimage.filters import gabor
import numpy as np

def extract_gabor_features(images):
    gabor_feats = []
    for img in images:
        feat, _ = gabor(img, frequency=0.6)
        gabor_feats.append(feat.flatten()) 
    return np.array(gabor_feats)

In [7]:
gabor_features = extract_gabor_features(X)
print('gabor shape : ', gabor_features.shape)

gabor shape :  (40000, 16384)


In [8]:
from skimage.feature import local_binary_pattern

def extract_lbp_features(images):
    lbp_feats = []
    for img in images:
        lbp = local_binary_pattern(img, 
                                   P=8,
                                   R=1,
                                   method='uniform')
        hist,_ = np.histogram(lbp.ravel(),
                              bins=59,
                              range=(0,59))
        hist = hist.astype("float")
        hist /=(hist.sum() + 1e-7)
        lbp_feats.append(hist)
    return np.array(lbp_feats)

In [9]:
lbp_features = extract_lbp_features(X)
print('LBP shape : ', lbp_features.shape)

LBP shape :  (40000, 59)


In [None]:
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report 

X_train,X_test,y_train,y_test = train_test_split(gabor_features,
                                                 y,
                                                 test_size=0.2,
                                                 random_state=42)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8000

    accuracy                           1.00      8000
   macro avg       1.00      1.00      1.00      8000
weighted avg       1.00      1.00      1.00      8000



In [11]:
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report 

X_train,X_test,y_train,y_test = train_test_split(lbp_features,
                                                 y,
                                                 test_size=0.2,
                                                 random_state=42)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      8000

    accuracy                           1.00      8000
   macro avg       1.00      1.00      1.00      8000
weighted avg       1.00      1.00      1.00      8000



In [15]:
import pickle

with open('rf_model.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [12]:
# from tensorflow.keras.models import Sequential 
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense 
# from sklearn.model_selection import train_test_split

# X_cnn = X.reshape(-1, 128,128, 1)/255.0 #normalize
# X_train, X_test, y_train, y_test = train_test_split(X_cnn,
#                                                     y,
#                                                     test_size=0.2,
#                                                     random_state=42)

# model = Sequential([
#     Conv2D(32, (3,3), activation='relu', input_shape=(128,128,1)),
#     MaxPooling2D(),
#     Conv2D(64, (3,3), activation='relu'),
#     MaxPooling2D(),
#     Flatten(),
#     Dense(128, activation='relu'),
#     Dense(1, activation='sigmoid')
# ])

# model.compile(optimizer='adam',
#               loss='binary_crossentropy', 
#               metrics=['accuracy'])

# model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))