In [1]:
import os
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import transforms, models
from torchvision.models import resnet50
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import pywt
import cv2
from keras.applications.resnet import preprocess_input


In [2]:
image_folder = r'Dataset\lung_colon_image_set\lung_image_sets'

In [3]:
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split

class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        
        image = Image.open(self.image_paths[idx]).convert('RGB')
        label = self.labels[idx]
        
        
        if self.transform:
            image = self.transform(image)

        return image, label


classes = ['lung_aca', 'lung_n', 'lung_scc']

image_paths = []
labels = []

for label, cls in enumerate(classes):
    cls_folder = os.path.join(image_folder, cls)
    for image_name in os.listdir(cls_folder):
        image_paths.append(os.path.join(cls_folder, image_name))
        labels.append(label)


train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths, labels, test_size=0.2, random_state=48)


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_dataset = CustomDataset(train_paths, train_labels, transform=transform)
test_dataset = CustomDataset(test_paths, test_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=True)

In [4]:
# 1. Görüntü Ön İşleme: Histogram Eşitleme
def histogram_equalization(image):
    if len(image.shape) == 3:  # RGB görüntü
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return cv2.equalizeHist(image)

# 2. DWT (Discrete Wavelet Transform)
def dwt_features(image):
    coeffs = pywt.dwt2(image, 'db1')
    cA, (cH, cV, cD) = coeffs
    return cA.flatten()  # Ana bileşenleri döndür

# 3. GLCM Özellik Çıkarma
def glcm_features(image):
    # Görüntüyü uint8 formatına dönüştür
    image_np = (image.numpy() * 255).astype(np.uint8)
    glcm = graycomatrix(image_np, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    return [contrast, dissimilarity, homogeneity, energy]
# 4. LBP (Local Binary Pattern)
def lbp_features(image):
    # Görüntüyü numpy dizisine ve uint8 veri tipine dönüştür
    image_np = (image.numpy() * 255).astype(np.uint8)
    # Local Binary Pattern işlemi
    lbp = local_binary_pattern(image_np, P=8, R=1, method="uniform")
    # Histogram çıkart ve döndür
    return np.histogram(lbp, bins=np.arange(0, 11), density=True)[0]


# 5. CNN Tabanlı Özellik Çıkarma (ResNet-50)
def cnn_features(image):
    model = resnet50(weights='imagenet', include_top=False, pooling='avg')
    image_resized = cv2.resize(image, (224, 224))
    image_preprocessed = preprocess_input(np.expand_dims(image_resized, axis=0))
    features = model.predict(image_preprocessed)
    return features.flatten()

# 6. PCA ile Özellik Boyutunu Küçültme
def apply_pca(features, n_components=500):
    pca = PCA(n_components=n_components)
    return pca.fit_transform(features)

def extract_features(loader):
    features = []
    labels = []

    for images, batch_labels in loader:
        for img, label in zip(images, batch_labels):
            img = img.permute(1, 2, 0).mean(dim=-1)  # RGB -> Grayscale
            
            dwt_feat = dwt_features(img)
            glcm_feat = glcm_features(img)
            lbp_feat = lbp_features(img)

            combined_features = np.concatenate([dwt_feat, glcm_feat, lbp_feat])
            features.append(combined_features)
            labels.append(label.item())

    return np.array(features), np.array(labels)



In [5]:
# Özellik çıkarma: Eğitim ve test veri seti
train_features, train_labels = extract_features(train_loader)
test_features, test_labels = extract_features(test_loader)

In [6]:
# PCA ile boyut küçültme
pca = PCA(n_components=500)
train_features_pca = pca.fit_transform(train_features)
test_features_pca = pca.transform(test_features)

In [7]:
# PCA ile boyut küçültme
pca = PCA(n_components=1024)
train_features_pca2 = pca.fit_transform(train_features)
test_features_pca2= pca.transform(test_features)

In [10]:
pca = PCA(n_components=300)
train_features_pca_3 = pca.fit_transform(train_features)
test_features_pca_3 = pca.transform(test_features)

In [11]:
pca = PCA(n_components=128)
train_features_pca_4 = pca.fit_transform(train_features)
test_features_pca_4 = pca.transform(test_features)

In [8]:
rf = RandomForestClassifier(n_estimators=400,criterion='gini',random_state=48)
rf.fit(train_features_pca,train_labels)
# Test doğruluğu
test_predictions = rf.predict(test_features_pca)
print(classification_report(test_labels, test_predictions))

              precision    recall  f1-score   support

           0       0.75      0.65      0.69       937
           1       0.76      0.89      0.82      1029
           2       0.91      0.87      0.89      1034

    accuracy                           0.81      3000
   macro avg       0.81      0.80      0.80      3000
weighted avg       0.81      0.81      0.81      3000



In [9]:
rf = RandomForestClassifier(n_estimators=400,criterion='gini',random_state=48)
rf.fit(train_features_pca2,train_labels)
# Test doğruluğu
test_predictions = rf.predict(test_features_pca2)
print(classification_report(test_labels, test_predictions))

              precision    recall  f1-score   support

           0       0.71      0.62      0.66       937
           1       0.75      0.87      0.81      1029
           2       0.90      0.85      0.87      1034

    accuracy                           0.79      3000
   macro avg       0.79      0.78      0.78      3000
weighted avg       0.79      0.79      0.78      3000



In [14]:
rf = RandomForestClassifier(n_estimators=400,criterion='gini',random_state=48)
rf.fit(train_features_pca_3,train_labels)
# Test doğruluğu
test_predictions = rf.predict(test_features_pca_3)
print(classification_report(test_labels, test_predictions))

              precision    recall  f1-score   support

           0       0.77      0.66      0.71       937
           1       0.77      0.90      0.83      1029
           2       0.91      0.88      0.89      1034

    accuracy                           0.82      3000
   macro avg       0.82      0.81      0.81      3000
weighted avg       0.82      0.82      0.81      3000



In [15]:
rf = RandomForestClassifier(n_estimators=400,criterion='gini',random_state=48)
rf.fit(train_features_pca_4,train_labels)
# Test doğruluğu
test_predictions = rf.predict(test_features_pca_4)
print(classification_report(test_labels, test_predictions))

              precision    recall  f1-score   support

           0       0.79      0.68      0.73       937
           1       0.79      0.91      0.85      1029
           2       0.91      0.89      0.90      1034

    accuracy                           0.83      3000
   macro avg       0.83      0.83      0.83      3000
weighted avg       0.83      0.83      0.83      3000

