In [1]:
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from skimage.feature import graycomatrix, graycoprops
from tqdm import tqdm

In [2]:
csv_file = "eye_diagrams_metadata.csv"
df = pd.read_csv(csv_file)

# Define paths
train_folder = "preprocessed/train"
test_folder = "preprocessed/test"

# Change target variable to OSNR_dB
target_variable = "OSNR_dB"  # We're now predicting OSNR levels

In [3]:
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df["OSNR_dB"], random_state=42)

In [4]:
def histogram_features(image, bins=16):
    image_uint8 = (image * 255).astype(np.uint8)  # Convert from float to uint8 (0-255)
    hist = cv2.calcHist([image_uint8], [0], None, [bins], [0, 256])  # Compute histogram
    hist = cv2.normalize(hist, hist).flatten()  # Normalize & flatten
    return hist


# Function to extract Fourier Transform features
def fourier_transform_features(image):
    f_transform = np.fft.fft2(image)
    f_shift = np.fft.fftshift(f_transform)
    magnitude_spectrum = 20 * np.log(np.abs(f_shift) + 1)  # Avoid log(0)
    return [np.mean(magnitude_spectrum), np.var(magnitude_spectrum)]  # Mean & variance of frequency components

# Function to extract GLCM texture features
def glcm_features(image):
    glcm = graycomatrix(image.astype(np.uint8), distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    entropy = -np.sum(glcm * np.log2(glcm + 1e-9))  # Compute entropy
    return [contrast, correlation, energy, entropy]

In [5]:
def extract_features_and_labels(folder, df_subset):
    features = []
    labels = []
    
    for _, row in tqdm(df_subset.iterrows(), total=len(df_subset), desc=f"Processing {folder}"):
        filename = row["Filename"].replace(".png", ".npy")  # Adjust filename for .npy
        file_path = os.path.join(folder, filename)
        
        if not os.path.exists(file_path):
            print(f"⚠️ Skipping missing file: {filename}")
            continue
        
        # Load image
        image = np.load(file_path)
        
        # Compute features
        mean_intensity = np.mean(image)
        variance = np.var(image)
        edge_density = np.sum(cv2.Canny((image * 255).astype(np.uint8), 100, 200)) / image.size
        
        hist_feats = histogram_features(image)
        fft_feats = fourier_transform_features(image)
        glcm_feats = glcm_features(image)

        # Store extracted features
        features.append([mean_intensity, variance, edge_density] + list(hist_feats) + list(fft_feats) + list(glcm_feats))
        labels.append(row[target_variable])  # OSNR_dB values
    
    return np.array(features), np.array(labels)

# Extract features from train & test sets
X_train, y_train = extract_features_and_labels(train_folder, train_df)
X_test, y_test = extract_features_and_labels(test_folder, test_df)

Processing preprocessed/train: 100%|██████████| 489/489 [00:02<00:00, 197.12it/s]
Processing preprocessed/test: 100%|██████████| 123/123 [00:00<00:00, 170.84it/s]


In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
model = SVR(kernel="rbf", C=100, gamma=0.1)  # Support Vector Regression

print("\n🚀 Training OSNR Prediction Model...")
model.fit(X_train, y_train)


🚀 Training OSNR Prediction Model...


In [8]:
y_pred = model.predict(X_test)

In [9]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\n✅ OSNR Prediction Model Evaluation:")
print(f"📏 Mean Absolute Error (MAE): {mae:.4f}")
print(f"📏 Mean Squared Error (MSE): {mse:.4f}")
print(f"📏 R² Score: {r2:.4f}")


✅ OSNR Prediction Model Evaluation:
📏 Mean Absolute Error (MAE): 1.2641
📏 Mean Squared Error (MSE): 3.7857
📏 R² Score: 0.9342
