# 1. Implement this repository on your image dataset and extract values of dissimilarity,correlation, homogeneity,contrast and energy with 45 degree angle only. save the values in a csv file; visualize head and tail of the csv.

In [1]:
pip install kaggle



In [3]:
!kaggle datasets download -d navoneel/brain-mri-images-for-brain-tumor-detection

Dataset URL: https://www.kaggle.com/datasets/navoneel/brain-mri-images-for-brain-tumor-detection
License(s): copyright-authors
Downloading brain-mri-images-for-brain-tumor-detection.zip to /content
 60% 9.00M/15.1M [00:00<00:00, 82.3MB/s]
100% 15.1M/15.1M [00:00<00:00, 109MB/s] 


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import os
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
import pandas as pd

In [8]:
image_dataset_dir = "/content/drive/MyDrive/Medical_Dataset/brain_tumor_dataset/yes"

In [9]:
image_dataset_dir

'/content/drive/MyDrive/Medical_Dataset/brain_tumor_dataset/yes'

In [11]:
# Check if the directory exists
if not os.path.exists(image_dataset_dir):
    raise FileNotFoundError(f"The directory {image_dataset_dir} does not exist.")

# Define the angles and distances for GLCM calculation
angles = [np.pi / 4]  # 45 degrees
distances = [1]  # Default distance

# Initialize a list to store features
features = []

# Process each image in the dataset
for image_name in os.listdir(image_dataset_dir):
    image_path = os.path.join(image_dataset_dir, image_name)

    # Ensure the file is an image
    if not image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
        continue

    # Load the image in grayscale
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Skipping {image_name} as it could not be loaded.")
        continue

    # Compute the GLCM matrix
    glcm = graycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)

    # Extract required properties
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]

    # Append the features
    features.append({
        'Image': image_name,
        'Dissimilarity': dissimilarity,
        'Correlation': correlation,
        'Homogeneity': homogeneity,
        'Contrast': contrast,
        'Energy': energy
    })

In [12]:
# Create a DataFrame and save to CSV
features_df = pd.DataFrame(features)
csv_path = "glcm_features.csv"  # Output CSV file
features_df.to_csv(csv_path, index=False)

In [14]:
features_df.head()

Unnamed: 0,Image,Dissimilarity,Correlation,Homogeneity,Contrast,Energy
0,Y111.JPG,13.666846,0.930176,0.230835,857.926199,0.082855
1,Y12.jpg,10.956946,0.941463,0.22029,540.993999,0.038418
2,Y109.JPG,15.605198,0.885831,0.164045,1187.918666,0.022295
3,Y17.jpg,11.203537,0.951209,0.293796,488.345771,0.076276
4,Y165.JPG,7.625157,0.952208,0.3447,235.12808,0.090618


In [15]:
features_df.tail()

Unnamed: 0,Image,Dissimilarity,Correlation,Homogeneity,Contrast,Energy
150,Y29.jpg,3.633263,0.980208,0.695435,133.695648,0.605748
151,Y185.jpg,9.837069,0.948661,0.174344,320.68391,0.028177
152,Y36.JPG,9.941951,0.971681,0.325616,343.225103,0.13164
153,Y58.JPG,5.743928,0.986932,0.348398,110.256543,0.073525
154,Y97.JPG,8.348104,0.928413,0.24671,446.702681,0.031733


In [17]:
features_df.shape

(155, 6)

# 2. Implement this code of Hog feature extraction on an image directory and save the values in a csv file; visualize head and tail of the csv.

In [16]:
import math
import matplotlib.pyplot as plt

In [18]:
class Hog_descriptor():
    def __init__(self, img, cell_size=16, bin_size=8):
        self.img = img
        self.img = np.sqrt(img / float(np.max(img)))
        self.img = self.img * 255
        self.cell_size = cell_size
        self.bin_size = bin_size
        self.angle_unit = 360 / self.bin_size
        assert type(self.bin_size) == int, "bin_size should be integer,"
        assert type(self.cell_size) == int, "cell_size should be integer,"

    def extract(self):
        height, width = self.img.shape
        gradient_magnitude, gradient_angle = self.global_gradient()
        gradient_magnitude = abs(gradient_magnitude)
        cell_gradient_vector = np.zeros((int(height / self.cell_size), int(width / self.cell_size), self.bin_size))
        for i in range(cell_gradient_vector.shape[0]):
            for j in range(cell_gradient_vector.shape[1]):
                cell_magnitude = gradient_magnitude[i * self.cell_size:(i + 1) * self.cell_size,
                                 j * self.cell_size:(j + 1) * self.cell_size]
                cell_angle = gradient_angle[i * self.cell_size:(i + 1) * self.cell_size,
                             j * self.cell_size:(j + 1) * self.cell_size]
                cell_gradient_vector[i][j] = self.cell_gradient(cell_magnitude, cell_angle)

        hog_vector = []
        for i in range(cell_gradient_vector.shape[0] - 1):
            for j in range(cell_gradient_vector.shape[1] - 1):
                block_vector = []
                block_vector.extend(cell_gradient_vector[i][j])
                block_vector.extend(cell_gradient_vector[i][j + 1])
                block_vector.extend(cell_gradient_vector[i + 1][j])
                block_vector.extend(cell_gradient_vector[i + 1][j + 1])
                mag = lambda vector: math.sqrt(sum(i ** 2 for i in vector))
                magnitude = mag(block_vector)
                if magnitude != 0:
                    normalize = lambda block_vector, magnitude: [element / magnitude for element in block_vector]
                    block_vector = normalize(block_vector, magnitude)
                hog_vector.append(block_vector)
        return hog_vector

    def global_gradient(self):
        gradient_values_x = cv2.Sobel(self.img, cv2.CV_64F, 1, 0, ksize=5)
        gradient_values_y = cv2.Sobel(self.img, cv2.CV_64F, 0, 1, ksize=5)
        gradient_magnitude = cv2.addWeighted(gradient_values_x, 0.5, gradient_values_y, 0.5, 0)
        gradient_angle = cv2.phase(gradient_values_x, gradient_values_y, angleInDegrees=True)
        return gradient_magnitude, gradient_angle

    def cell_gradient(self, cell_magnitude, cell_angle):
        orientation_centers = [0] * self.bin_size
        for i in range(cell_magnitude.shape[0]):
            for j in range(cell_magnitude.shape[1]):
                gradient_strength = cell_magnitude[i][j]
                gradient_angle = cell_angle[i][j]
                min_angle, max_angle, mod = self.get_closest_bins(gradient_angle)
                orientation_centers[min_angle] += (gradient_strength * (1 - (mod / self.angle_unit)))
                orientation_centers[max_angle] += (gradient_strength * (mod / self.angle_unit))
        return orientation_centers

    def get_closest_bins(self, gradient_angle):
        idx = int(gradient_angle / self.angle_unit)
        mod = gradient_angle % self.angle_unit
        if idx == self.bin_size:
            return idx - 1, (idx) % self.bin_size, mod
        return idx, (idx + 1) % self.bin_size, mod


# Process images in a directory
def process_images_in_directory(image_directory, output_csv):
    features_list = []

    for image_name in os.listdir(image_directory):
        image_path = os.path.join(image_directory, image_name)

        if not image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            continue

        # Load the image in grayscale
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

        # Skip if the image could not be loaded
        if img is None:
            print(f"Skipping {image_name} - unable to load.")
            continue

        # Extract HOG features
        hog = Hog_descriptor(img, cell_size=16, bin_size=16)
        hog_vector = hog.extract()

        # Flatten the HOG vector
        flattened_vector = [item for sublist in hog_vector for item in sublist]

        # Append features and image name
        features_list.append({
            'Image': image_name,
            'HOG_Features': flattened_vector
        })

    # Create a DataFrame and save to CSV
    features_df = pd.DataFrame(features_list)
    features_df.to_csv(output_csv, index=False)

    print(f"HOG features saved to {output_csv}")

    # Display head and tail of the CSV
    print("\nHead of the CSV:")
    print(features_df.head())
    print("\nTail of the CSV:")
    print(features_df.tail())


# Directory containing the images
image_directory = "/content/drive/MyDrive/Medical_Dataset/brain_tumor_dataset/yes"  # Replace with your folder path
output_csv = "hog_features.csv"  # Path to save the CSV

process_images_in_directory(image_directory, output_csv)


HOG features saved to hog_features.csv

Head of the CSV:
      Image                                       HOG_Features
0  Y111.JPG  [0.021021360970381674, 0.005075976683766044, 0...
1   Y12.jpg  [0.0, 0.0, 0.0013973222373475842, 0.1155762851...
2  Y109.JPG  [0.007016843045548428, 0.0018773601723243414, ...
3   Y17.jpg  [0.5076745461964433, 0.016991591826980457, 0.0...
4  Y165.JPG  [0.07518904962422938, 0.22628841168770727, 0.1...

Tail of the CSV:
        Image                                       HOG_Features
150   Y29.jpg  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
151  Y185.jpg  [0.3682885674528978, 0.218588413192716, 0.1749...
152   Y36.JPG  [0.25049052490119994, 0.06409922154044813, 0.0...
153   Y58.JPG  [0.593615366533131, 0.10460206382183543, 0.022...
154   Y97.JPG  [0.013431924954332037, 0.007118402848766388, 0...
