In [None]:
import tensorflow as tf
import cv2 
from skimage import morphology
from skimage.filters import threshold_multiotsu

def augment_image(file_path, threshold_val):
    file_contents = tf.io.read_file(file_path)
    image = tf.io.decode_image(file_contents, channels=1, dtype=tf.uint8)
    image = tf.image.convert_image_dtype(image, tf.float32)

    image = tf.image.resize(image, [224, 224])

    # 3) Data augmentation
    # Only adjust brightness if random uniform > threshold.
    if tf.random.uniform(()) > threshold_val:
        image = tf.image.flip_left_right(image)

    # Losowo zdecyduj, czy zastosować obrót o 90 stopni
    if tf.random.uniform(()) > threshold_val:
        k = tf.random.uniform((), minval=0, maxval=4, dtype=tf.int32)
        image = tf.image.rot90(image, k=k)

    # Losowa zmiana jasności
    if tf.random.uniform(()) > threshold_val:
        delta = tf.random.uniform((), minval=-0.2, maxval=0.2)
        image = tf.image.adjust_brightness(image, delta)
    
    image_uint8 = tf.image.convert_image_dtype(image, tf.uint8)
    encoded_image = tf.io.encode_jpeg(image_uint8)
    tf.io.write_file(file_path, encoded_image)


In [None]:
def process_image(image_path):
    """
    Loads the image from `image_path`, performs:
      1. Standardization
      2. Normalization
      3. Watershed segmentation
      4. Multilevel thresholding
    and saves the final processed image back to the same file path.
    """
    # 1) LOAD the image via TensorFlow
    file_contents = tf.io.read_file(image_path)
    # Decode as a single-channel image (grayscale). 
    # If your images are actually RGB, set channels=3 instead.
    img = tf.io.decode_image(file_contents, channels=1, dtype=tf.float32)
    img = tf.image.resize(img, [224, 224])
    # ----------------------------------------------------------------
    # 2) STANDARDIZATION (z-score: zero mean, unit variance)
    #    per_image_standardization does: (x - mean) / adjusted_stddev
    # ----------------------------------------------------------------
    standardized = tf.image.per_image_standardization(img)

    # ----------------------------------------------------------------
    # 3) NORMALIZATION (scale pixels to [0,1])
    # ----------------------------------------------------------------
    std_np = standardized.numpy().squeeze() 
    min_val, max_val = std_np.min(), std_np.max()
    normalized_np = (std_np - min_val) / (max_val - min_val + 1e-8)

    # ----------------------------------------------------------------
    # 4) WATERSHED SEGMENTATION
    #    We'll create a simple binary mask using Otsu's threshold
    #    and then apply the watershed algorithm.
    # ----------------------------------------------------------------
    # ...
    otsu_thresh = filters.threshold_otsu(normalized_np)
    binary_mask = normalized_np > otsu_thresh
    distance = ndimage.distance_transform_edt(binary_mask)

    # Step 1: get peak coordinates (Nx2 array)
    coords = peak_local_max(distance, min_distance=10, labels=binary_mask)

    # Step 2: build a boolean mask from these coordinates
    local_max_mask = np.zeros_like(distance, dtype=bool)
    local_max_mask[coords[:, 0], coords[:, 1]] = True

    # Step 3: label the maxima
    markers = ndimage.label(local_max_mask)[0]

    # Step 4: perform watershed
    labels_ws = watershed(-distance, markers, mask=binary_mask)

    # ----------------------------------------------------------------
    # 5) MULTI-LEVEL THRESHOLDING
    #    We'll separate the normalized image into three classes as example.
    #    Increase or decrease `classes` parameter as needed.
    # ----------------------------------------------------------------
    thresholds = threshold_multiotsu(normalized_np, classes=3)
    multi_thresh_img = np.digitize(normalized_np, bins=thresholds)

    # ----------------------------------------------------------------
    # Choose how you want the final image to look:
    # For demonstration, we'll take the multi-level thresholding result
    # as the final output. You might also combine or visualize watershed
    # differently. Feel free to adapt as necessary.
    # ----------------------------------------------------------------
    final_img_np = (multi_thresh_img * 127).astype(np.uint8)

    # Re-encode via TensorFlow and overwrite
    final_img_tf = tf.expand_dims(tf.convert_to_tensor(final_img_np), axis=-1)
    encoded_img = tf.io.encode_png(final_img_tf)
    out_filepath = image_path.replace("/kaggle/input", "/kaggle/working")
    tf.io.write_file(out_filepath, encoded_img)

In [None]:
import os
import tensorflow as tf
import numpy as np

from skimage import filters
from skimage.filters import threshold_multiotsu
from skimage.segmentation import watershed
from skimage.feature import peak_local_max
from scipy import ndimage
from skimage.feature import peak_local_max
from scipy import ndimage


In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        in_path = os.path.join(dirname, filename)
        if os.path.getsize(in_path) == 0:
            print(f"Skipping empty file: {in_path}")
        else:    
            print(os.path.join(dirname, filename))
            out_path = in_path.replace("/kaggle/input", "/kaggle/working")
            process_image(in_path)


In [None]:
import shutil

train_set = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train"
train_40_set = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_40"
train_70_set = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_70"

if os.path.exists(train_40_set):
    shutil.rmtree(train_40_set)  # remove the existing directory
shutil.copytree(train_set, train_40_set)
if os.path.exists(train_70_set):
    shutil.rmtree(train_70_set)  # remove the existing directory
shutil.copytree(train_set, train_70_set)

In [None]:
def call_train_set(train_set, threshhold):
    for dirname, _, filenames in os.walk(train_set):
        for filename in filenames:
            in_path = os.path.join(dirname, filename)
            augment_image(in_path, threshhold)

In [None]:
call_train_set(train_40_set, 0.4)
call_train_set(train_70_set, 0.7)

In [None]:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt

augmentation_counts = {
    "flip": 0,
    "rotate": 0,
    "brightness": 0,
    "at_least_two": 0,
    "all_three": 0
}

example_images = {
    "flip": [],
    "rotate": [],
    "brightness": [],
    "at_least_two": [],
    "all_three": []
}

def augment_image(file_path, threshold_val):
    file_contents = tf.io.read_file(file_path)
    image = tf.io.decode_image(file_contents, channels=1, dtype=tf.uint8)
    image = tf.image.convert_image_dtype(image, tf.float32)

    applied_augmentations = []

    # Flip
    if tf.random.uniform(()) > threshold_val:
        image = tf.image.flip_left_right(image)
        applied_augmentations.append("flip")

    # Rotate
    if tf.random.uniform(()) > threshold_val:
        k = tf.random.uniform((), minval=0, maxval=4, dtype=tf.int32)
        image = tf.image.rot90(image, k=k)
        applied_augmentations.append("rotate")

    # Brightness
    if tf.random.uniform(()) > threshold_val:
        delta = tf.random.uniform((), minval=-0.2, maxval=0.2)
        image = tf.image.adjust_brightness(image, delta)
        applied_augmentations.append("brightness")

    for aug in applied_augmentations:
        augmentation_counts[aug] += 1
    
    if len(applied_augmentations) >= 2:
        augmentation_counts["at_least_two"] += 1
    
    if len(applied_augmentations) == 3:
        augmentation_counts["all_three"] += 1

    for aug in applied_augmentations:
        if len(example_images[aug]) < 4:
            example_images[aug].append(image.numpy())
    
    if len(applied_augmentations) >= 2 and len(example_images["at_least_two"]) < 4:
        example_images["at_least_two"].append(image.numpy())
    
    if len(applied_augmentations) == 3 and len(example_images["all_three"]) < 4:
        example_images["all_three"].append(image.numpy())
    
    image_uint8 = tf.image.convert_image_dtype(image, tf.uint8)
    encoded_image = tf.io.encode_jpeg(image_uint8)
    tf.io.write_file(file_path, encoded_image)

def call_train_set(train_set, threshhold):
    for dirname, _, filenames in os.walk(train_set):
        for filename in filenames:
            in_path = os.path.join(dirname, filename)
            augment_image(in_path, threshhold)

def display_images():
    fig, axes = plt.subplots(5, 4, figsize=(15, 15))
    titles = ["flip", "rotate", "brightness", "at_least_two", "all_three"]
    
    for i, key in enumerate(titles):
        for j in range(4):
            if j < len(example_images[key]):
                axes[i, j].imshow(example_images[key][j].squeeze(), cmap='gray')
            axes[i, j].set_title(f"{key} {j+1}")
            axes[i, j].axis("off")
    
    plt.show()

train_set = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train"
train_40_set = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_40"
train_70_set = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_70"

call_train_set(train_40_set, 0.4)
call_train_set(train_70_set, 0.7)

print("Statystyki augmentacji:")
for key, value in augmentation_counts.items():
    print(f"{key}: {value}")

display_images()

In [None]:
segmented_images = {
    "segmented": []
}

def preprocess_image(image_path):
    file_contents = tf.io.read_file(image_path)
    img = tf.io.decode_image(file_contents, channels=1, dtype=tf.uint8)
    img = tf.image.resize(img, [224, 224])
    img_np = img.numpy().squeeze().astype(np.uint8)
    
    # Gaussian Blur
    img_np = cv2.GaussianBlur(img_np, (5, 5), 0)
    
    # Contrast Enhancement
    contrast = img_np.max() - img_np.min()
    if contrast < 50:
        img_np = cv2.equalizeHist(img_np)
    
    # Multilevel Thresholding (directly on the processed image, skipping binarization)
    thresholds = threshold_multiotsu(img_np, classes=3)
    segmented = np.digitize(img_np, bins=thresholds)
    
    # Morphological Processing
    segmented = morphology.remove_small_objects(segmented.astype(bool), min_size=30)
    segmented = morphology.remove_small_holes(segmented, area_threshold=50)
    
    final_img_np = (segmented * 127).astype(np.uint8)
    segmented_images["segmented"].append(final_img_np)
    
    final_img_tf = tf.expand_dims(tf.convert_to_tensor(final_img_np), axis=-1)
    encoded_img = tf.io.encode_png(final_img_tf)
    out_filepath = image_path.replace("/kaggle/input", "/kaggle/working")
    tf.io.write_file(out_filepath, encoded_img)

def display_segmented_images():
    fig, axes = plt.subplots(1, 4, figsize=(15, 5))
    for j in range(4):
        if j < len(segmented_images["segmented"]):
            axes[j].imshow(segmented_images["segmented"][j].squeeze(), cmap='gray')
        axes[j].set_title(f"Segmented {j+1}")
        axes[j].axis("off")
    plt.show()

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        in_path = os.path.join(dirname, filename)
        if os.path.getsize(in_path) == 0:
            print(f"Skipping empty file: {in_path}")
        else:
            preprocess_image(in_path)

display_segmented_images()


In [None]:
import cv2 
segmented_images = {
    "blurred": [],
    "contrast_enhanced": [],
    "thresholded": [],
    "morphological": []
}

def preprocess_image(image_path):
    file_contents = tf.io.read_file(image_path)
    img = tf.io.decode_image(file_contents, channels=1, dtype=tf.uint8)
    img = tf.image.resize(img, [224, 224])
    img_np = img.numpy().squeeze().astype(np.uint8)
    
    # Gaussian Blur
    img_np = cv2.GaussianBlur(img_np, (5, 5), 0)
    if len(segmented_images["blurred"]) < 4:
        segmented_images["blurred"].append(img_np)
    
    # Contrast Enhancement
    contrast = img_np.max() - img_np.min()
    if contrast < 50:
        img_np = cv2.equalizeHist(img_np)
    if len(segmented_images["contrast_enhanced"]) < 4:
        segmented_images["contrast_enhanced"].append(img_np)
    
    # Multilevel Thresholding (directly on the processed image, skipping binarization)
    thresholds = threshold_multiotsu(img_np, classes=3)
    segmented = np.digitize(img_np, bins=thresholds)
    if len(segmented_images["thresholded"]) < 4:
        segmented_images["thresholded"].append((segmented * 127).astype(np.uint8))
    
    # Morphological Processing
    segmented = morphology.remove_small_objects(segmented.astype(bool), min_size=30)
    segmented = morphology.remove_small_holes(segmented, area_threshold=50)
    final_img_np = (segmented * 127).astype(np.uint8)
    
    if len(segmented_images["morphological"]) < 4:
        segmented_images["morphological"].append(final_img_np)
    
    final_img_tf = tf.expand_dims(tf.convert_to_tensor(final_img_np), axis=-1)
    encoded_img = tf.io.encode_png(final_img_tf)
    out_filepath = image_path.replace("/kaggle/input", "/kaggle/working")
    tf.io.write_file(out_filepath, encoded_img)

def display_segmented_images():
    stages = ["blurred", "contrast_enhanced", "thresholded", "morphological"]
    fig, axes = plt.subplots(4, 4, figsize=(15, 15))
    for i, stage in enumerate(stages):
        for j in range(4):
            if j < len(segmented_images[stage]):
                axes[i, j].imshow(segmented_images[stage][j].squeeze(), cmap='gray')
            axes[i, j].set_title(f"{stage} {j+1}")
            axes[i, j].axis("off")
    plt.show()

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        in_path = os.path.join(dirname, filename)
        if os.path.getsize(in_path) == 0:
            print(f"Skipping empty file: {in_path}")
        else:
            preprocess_image(in_path)

display_segmented_images()

In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_40"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  
    image = cv2.resize(image, (128, 128))
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True, visualize=True)
    return features

def load_data(directory):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0} 
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(classes[label])
    return np.array(X), np.array(y)

X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" Dokładność modelu k-NN40: {accuracy:.2f}")


In [None]:
train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_70"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  
    image = cv2.resize(image, (128, 128))
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True, visualize=True)
    return features

def load_data(directory):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0} 
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(classes[label])
    return np.array(X), np.array(y)

X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" Dokładność modelu k-NN70: {accuracy:.2f}")


In [None]:
from sklearn.naive_bayes import GaussianNB

train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_40"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  
    image = cv2.resize(image, (128, 128))  # Skalowanie
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True, visualize=True)
    return features

def load_data(directory):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0}  
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(classes[label])
    return np.array(X), np.array(y)

X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" Dokładność modelu Naïve Bayes 40: {accuracy:.2f}")


In [None]:
from sklearn.naive_bayes import GaussianNB

train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_70"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  
    image = cv2.resize(image, (128, 128))  # Skalowanie
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True, visualize=True)
    return features

def load_data(directory):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0}  
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(classes[label])
    return np.array(X), np.array(y)

X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" Dokładność modelu Naïve Bayes 70: {accuracy:.2f}")


In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_40"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 
    image = cv2.resize(image, (128, 128))  
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True, visualize=True)
    return features

# Funkcja do wczytania obrazów i etykiet
def load_data(directory):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0}  
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(classes[label])
    return np.array(X), np.array(y)

X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)

base_model = DecisionTreeClassifier(max_depth=1)
adaboost = AdaBoostClassifier(base_model, n_estimators=50, learning_rate=1.0)
adaboost.fit(X_train, y_train)

y_pred = adaboost.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" Dokładność modelu AdaBoost 40: {accuracy:.2f}")


In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_70"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) 
    image = cv2.resize(image, (128, 128))  
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True, visualize=True)
    return features

# Funkcja do wczytania obrazów i etykiet
def load_data(directory):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0}  
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            features = extract_features(file_path)
            X.append(features)
            y.append(classes[label])
    return np.array(X), np.array(y)

X_train, y_train = load_data(train_dir)
X_test, y_test = load_data(test_dir)

base_model = DecisionTreeClassifier(max_depth=1)
adaboost = AdaBoostClassifier(base_model, n_estimators=50, learning_rate=1.0)
adaboost.fit(X_train, y_train)

y_pred = adaboost.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" Dokładność modelu AdaBoost 70: {accuracy:.2f}")


In [None]:

from skimage.feature import hog, local_binary_pattern
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

def extract_color_hist(image_path, bins=(8, 8, 8)):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (128, 128))
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

def extract_hog(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                      feature_vector=True, visualize=True)
    return features

def extract_lbp(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))
    lbp = local_binary_pattern(image, P=8, R=1, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 59))
    return hist.astype("float") / (hist.sum() + 1e-6)


def load_data(directory, feature_func):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0}
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            if file_path.endswith(('.jpg', '.png', '.jpeg')):
                features = feature_func(file_path)
                X.append(features)
                y.append(classes[label])
    return np.array(X), np.array(y)


# Ścieżki do danych
train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_40"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

# === Naive Bayes (histogramy kolorów) ===
X_train_nb, y_train_nb = load_data(train_dir, extract_color_hist)
X_test_nb, y_test_nb = load_data(test_dir, extract_color_hist)

nb_model = GaussianNB()
nb_model.fit(X_train_nb, y_train_nb)
pred_nb = nb_model.predict(X_test_nb)
print(f" Naive Bayes 40 (HSV hist): {accuracy_score(y_test_nb, pred_nb):.2f}")

# === k-NN (HoG) ===
X_train_knn, y_train_knn = load_data(train_dir, extract_hog)
X_test_knn, y_test_knn = load_data(test_dir, extract_hog)

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_knn, y_train_knn)
pred_knn = knn_model.predict(X_test_knn)
print(f" k-NN 40 (HoG): {accuracy_score(y_test_knn, pred_knn):.2f}")

# === AdaBoost (LBP) ===
X_train_ada, y_train_ada = load_data(train_dir, extract_lbp)
X_test_ada, y_test_ada = load_data(test_dir, extract_lbp)

ada_model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=50)
ada_model.fit(X_train_ada, y_train_ada)
pred_ada = ada_model.predict(X_test_ada)
print(f" AdaBoost 40 (LBP): {accuracy_score(y_test_ada, pred_ada):.2f}")



In [None]:

from skimage.feature import hog, local_binary_pattern
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

def extract_color_hist(image_path, bins=(8, 8, 8)):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (128, 128))
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    return cv2.normalize(hist, hist).flatten()

def extract_hog(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))
    features, _ = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                      feature_vector=True, visualize=True)
    return features

def extract_lbp(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))
    lbp = local_binary_pattern(image, P=8, R=1, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 59))
    return hist.astype("float") / (hist.sum() + 1e-6)


def load_data(directory, feature_func):
    X, y = [], []
    classes = {"infected": 1, "notinfected": 0}
    for label in classes:
        folder_path = os.path.join(directory, label)
        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)
            if file_path.endswith(('.jpg', '.png', '.jpeg')):
                features = feature_func(file_path)
                X.append(features)
                y.append(classes[label])
    return np.array(X), np.array(y)


# Ścieżki do danych
train_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/train_70"
test_dir = "/kaggle/working/pcos-detection-using-ultrasound-images/data/test"

# === Naive Bayes (histogramy kolorów) ===
X_train_nb, y_train_nb = load_data(train_dir, extract_color_hist)
X_test_nb, y_test_nb = load_data(test_dir, extract_color_hist)

nb_model = GaussianNB()
nb_model.fit(X_train_nb, y_train_nb)
pred_nb = nb_model.predict(X_test_nb)
print(f" Naive Bayes 70 (HSV hist): {accuracy_score(y_test_nb, pred_nb):.2f}")

# === k-NN (HoG) ===
X_train_knn, y_train_knn = load_data(train_dir, extract_hog)
X_test_knn, y_test_knn = load_data(test_dir, extract_hog)

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_knn, y_train_knn)
pred_knn = knn_model.predict(X_test_knn)
print(f" k-NN 70 (HoG): {accuracy_score(y_test_knn, pred_knn):.2f}")

# === AdaBoost (LBP) ===
X_train_ada, y_train_ada = load_data(train_dir, extract_lbp)
X_test_ada, y_test_ada = load_data(test_dir, extract_lbp)

ada_model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=50)
ada_model.fit(X_train_ada, y_train_ada)
pred_ada = ada_model.predict(X_test_ada)
print(f" AdaBoost 70 (LBP): {accuracy_score(y_test_ada, pred_ada):.2f}")



In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(5, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Not Infected", "Infected"],
                yticklabels=["Not Infected", "Infected"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"Confusion Matrix: {title}")
    plt.show()

plot_confusion_matrix(y_test_nb, pred_nb, "Naive Bayes (HSV Histogram)")

plot_confusion_matrix(y_test_knn, pred_knn, "k-NN (HoG)")

plot_confusion_matrix(y_test_ada, pred_ada, "AdaBoost (LBP)")


def visualize_color_hist(image_path):
    image = cv2.imread(image_path)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    hist_h = cv2.calcHist([hsv], [0], None, [256], [0, 256])
    hist_s = cv2.calcHist([hsv], [1], None, [256], [0, 256])
    hist_v = cv2.calcHist([hsv], [2], None, [256], [0, 256])

    plt.figure(figsize=(10, 4))
    plt.subplot(1, 3, 1)
    plt.plot(hist_h, color='red')
    plt.title('H Channel')

    plt.subplot(1, 3, 2)
    plt.plot(hist_s, color='green')
    plt.title('S Channel')

    plt.subplot(1, 3, 3)
    plt.plot(hist_v, color='blue')
    plt.title('V Channel')

    plt.suptitle("HSV Histogram")
    plt.show()

sample_image = os.path.join(train_dir, "infected", os.listdir(os.path.join(train_dir, "infected"))[0])
visualize_color_hist(sample_image)


def visualize_hog(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))
    features, hog_image = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2),
                              feature_vector=True, visualize=True)
    
    plt.figure(figsize=(6, 3))
    plt.subplot(1, 2, 1)
    plt.imshow(image, cmap="gray")
    plt.title("Original Image")

    plt.subplot(1, 2, 2)
    plt.imshow(hog_image, cmap="hot")
    plt.title("HoG Features")

    plt.show()

visualize_hog(sample_image)


def visualize_lbp(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    image = cv2.resize(image, (128, 128))
    lbp_image = local_binary_pattern(image, P=8, R=1, method='uniform')

    plt.figure(figsize=(6, 3))
    plt.subplot(1, 2, 1)
    plt.imshow(image, cmap="gray")
    plt.title("Original Image")

    plt.subplot(1, 2, 2)
    plt.imshow(lbp_image, cmap="gray")
    plt.title("LBP Features")

    plt.show()

visualize_lbp(sample_image)


In [None]:
from sklearn.metrics import classification_report

def print_metrics(y_true, y_pred, model_name):
    print(f" Wyniki dla: {model_name}")
    print(classification_report(y_true, y_pred, target_names=["Not Infected", "Infected"]))
    print("-" * 50)

print_metrics(y_test_nb, pred_nb, "Naive Bayes (HSV Histogram)")
print_metrics(y_test_knn, pred_knn, "k-NN (HoG)")
print_metrics(y_test_ada, pred_ada, "AdaBoost (LBP)")


In [None]:
from sklearn.metrics import roc_curve, auc

def plot_roc_curve(y_true, y_scores, model_name):
    fpr, tpr, _ = roc_curve(y_true, y_scores)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(6, 5))
    plt.plot(fpr, tpr, color='blue', lw=2, label=f'AUC = {roc_auc:.2f}')
    plt.plot([0, 1], [0, 1], color='gray', linestyle='--')
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"ROC Curve: {model_name}")
    plt.legend(loc="lower right")
    plt.show()

# Naive Bayes
y_scores_nb = nb_model.predict_proba(X_test_nb)[:, 1]
plot_roc_curve(y_test_nb, y_scores_nb, "Naive Bayes (HSV Histogram)")

# k-NN
y_scores_knn = knn_model.predict_proba(X_test_knn)[:, 1]
plot_roc_curve(y_test_knn, y_scores_knn, "k-NN (HoG)")

# AdaBoost
y_scores_ada = ada_model.predict_proba(X_test_ada)[:, 1]
plot_roc_curve(y_test_ada, y_scores_ada, "AdaBoost (LBP)")
