In [1]:
import os
import cv2
import pandas as pd
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray
from skimage.io import imread
import numpy as np

# Path where tiles are stored
tile_dir = "./tiles"
all_features=[]

# GLCM settings
distances = [1]  # pixel distance
angles = [0]     # 0 degrees

# Storage for features
glcm_data = []

for filename in os.listdir(tile_dir):
    if filename.endswith(".png"):
        filepath = os.path.join(tile_dir, filename)
        label = int(filename.split("_label_")[1].split(".")[0])

        # Read image and convert to grayscale
        img = imread(filepath)
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

        # Compute GLCM
        glcm = graycomatrix(gray, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

        # Extract texture features
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        correlation = graycoprops(glcm, 'correlation')[0, 0]

        glcm_data.append({
            'tile': filename,
            'label': label,
            'contrast': contrast,
            'homogeneity': homogeneity,
            'energy': energy,
            'correlation': correlation
        })

# Save as DataFrame
df_glcm = pd.DataFrame(glcm_data)
df_glcm.to_csv("glcm_features.csv", index=False)

print("GLCM feature extraction complete.")
df_glcm.head()


GLCM feature extraction complete.


Unnamed: 0,tile,label,contrast,homogeneity,energy,correlation
0,S17-08918_tile_17_label_0.png,0,0.1947,0.96777,0.781524,0.795835
1,Her2Neg_Case_36_tile_10_label_0.png,0,103.543474,0.562966,0.156768,0.92336
2,Her2Pos_Case_83_tile_3_label_1.png,1,0.036964,0.981867,0.902182,0.879972
3,Her2Neg_Case_50_tile_15_label_0.png,0,0.187393,0.970007,0.882913,0.778368
4,Her2Pos_Case_38_tile_2_label_1.png,1,0.033655,0.990342,0.968358,0.704497


In [2]:
#1st feature chosen is local binary patterns of the tissue
# local binary pattern describes texture using the local contrast between pixels i.e how dark a pixel is compared to its neighbours in binary patterns
from skimage.feature import local_binary_pattern

radius = 3
n_pts = 8 * radius  #made it such that 24 neighbouring pixels scanned
lbp = local_binary_pattern(gray, n_pts, radius, method="uniform")
lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_pts+3), range=(0, n_pts+2))
#In image processing, we often use histograms to count how many times certain patterns, intensities, or orientations occur.
lbp_hist = lbp_hist/ lbp_hist.sum() #I normalised it to make it independent of image size thus making it a proper feature vector

In [3]:
#2nd feature I chose was the colour statisctics of each image i.e. mean and std deviation of RGB 
#did this because HER2+ often shows stronger membrane staining of brown and red
mean_r = img[:, :, 0].mean()
std_r = img[:, :, 0].std()
mean_g = img[:, :, 1].mean()
std_g = img[:, :, 1].std()
mean_b = img[:, :, 2].mean()
std_b = img[:, :, 2].std()

#mean_r: Average red intensity in the tile.
#std_r: How much the red intensity varies — higher = more variation (e.g., boundaries, gradients).
#Same for green — useful because hematoxylin (blue/purple) and eosin (pink/red) mix to create various colors.
#The blue channel often captures nuclear content stained by hematoxylin.

In [4]:
#3rd feature used is edge and structure detection using Histogram of Oriented Gradients(HOG)
#HOG gives us idea about Where are the edges in this image, and in which direction do they point?
from skimage.feature import hog

hog_features, _ = hog(
    gray,
    pixels_per_cell=(16, 16),
    cells_per_block=(2, 2),
    orientations=8,
    block_norm='L2',
    visualize=True
)


In [None]:
import os
import cv2
import pandas as pd
import numpy as np
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog
from skimage.io import imread

tile_dir = "./tiles"
all_features = []

# GLCM
distances = [1]
angles = [0]

# LBP
radius = 3
n_points = 8 * radius

tiles = [f for f in os.listdir(tile_dir) if f.endswith(".png")]
print(f"Total tiles to process: {len(tiles)}")

for filename in tiles:
    filepath = os.path.join(tile_dir, filename)

    try:
        # Label
        label = int(filename.split("_label_")[1].split(".")[0])

        # Image & grayscale
        img = imread(filepath)
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

        # ========== GLCM ==========
        glcm = graycomatrix(gray, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        energy = graycoprops(glcm, 'energy')[0, 0]
        correlation = graycoprops(glcm, 'correlation')[0, 0]

        # ========== LBP ==========
        lbp = local_binary_pattern(gray, n_points, radius, method="uniform")
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
        lbp_hist = lbp_hist / lbp_hist.sum()

        # ========== Color Stats ==========
        mean_r = img[:, :, 0].mean()
        std_r = img[:, :, 0].std()
        mean_g = img[:, :, 1].mean()
        std_g = img[:, :, 1].std()
        mean_b = img[:, :, 2].mean()
        std_b = img[:, :, 2].std()

        # ========== HOG ==========
        hog_features, _ = hog(gray,
                              pixels_per_cell=(16, 16),
                              cells_per_block=(2, 2),
                              orientations=8,
                              block_norm='L2',
                              visualize=True)

        # ========== Build feature dictionary ==========
        feature_dict = {
            'tile': filename,
            'label': label,
            'contrast': contrast,
            'homogeneity': homogeneity,
            'energy': energy,
            'correlation': correlation,
            'mean_r': mean_r,
            'std_r': std_r,
            'mean_g': mean_g,
            'std_g': std_g,
            'mean_b': mean_b,
            'std_b': std_b,
        }

        # Add LBP features
        for i, val in enumerate(lbp_hist):
            feature_dict[f'lbp_{i}'] = val

        # Add HOG features
        for i, val in enumerate(hog_features):
            feature_dict[f'hog_{i}'] = val

        # Append to final list
        all_features.append(feature_dict)

    except Exception as e:
        print(f"Error in tile {filename}: {e}")

In [None]:
df_full = pd.DataFrame(all_features)
df_full.to_csv("her2_all_features.csv", index=False)
print(f"Feature dataset saved with {len(df_full)} rows.")