<a href="https://colab.research.google.com/github/Hamda-Bahri/bfset-experiments/blob/main/notebooks/00_bfset_feature_statistics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BFSET Feature Statistics Notebook
This notebook computes and visualizes statistical distributions of GLCM and HOG features extracted from the BFSET dataset.

## Overview
- Descriptive statistics
- Histograms for GLCM contrast, energy, homogeneity, entropy
- HOG mean distribution
- Correlation matrix
- Highâ€‘resolution figure generation

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Generic paths (to be replaced by user)
FEATURES_CSV = "path/to/features.csv"

# Load data
data = pd.read_csv(FEATURES_CSV)
data.head()

## Descriptive Statistics

In [None]:
data.describe()

## Histograms for GLCM Features

In [None]:
glcm_cols = ['GLCM_Contrast','GLCM_Energy','GLCM_Homogeneity','GLCM_Entropy']
for col in glcm_cols:
    plt.figure(figsize=(10,6))
    sns.histplot(data[col], bins=30, kde=True)
    plt.title(f"Distribution of {col}")
    plt.xlabel(col)
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.show()

## HOG Mean Distribution

In [None]:
plt.figure(figsize=(10,6))
sns.histplot(data['HOG_Mean'], bins=30, kde=True)
plt.title("Distribution of HOG Mean")
plt.xlabel("HOG_Mean")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

## Correlation Matrix

In [None]:
corr = data[glcm_cols + ['HOG_Mean']].corr()
plt.figure(figsize=(10,8))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()

## High-Resolution Figures
The following code generates 300 dpi publication-ready figures.

In [None]:
for col in glcm_cols + ['HOG_Mean']:
    plt.figure(figsize=(12,8), dpi=300)
    sns.histplot(data[col], bins=30, kde=True)
    plt.title(f"High-Resolution Distribution of {col}", fontsize=18)
    plt.xlabel(col)
    plt.ylabel("Frequency")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"{col}_histogram_hd.png", dpi=300)
    plt.show()

# Save correlation matrix HD
plt.figure(figsize=(12,10), dpi=300)
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix (HD)", fontsize=18)
plt.tight_layout()
plt.savefig("correlation_matrix_hd.png", dpi=300)
plt.show()