<a href="https://colab.research.google.com/github/DinurakshanRavichandran/Visio-Glance/blob/Glaucoma-Model/glaucoma_V03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Enhanced Glaucoma Detection Model


## Steps to Improve Model Accuracy
1. **Data Preprocessing**:
    - Handling class imbalance with SMOTE (requires `imblearn` library).
    - Scaling features using `StandardScaler`.
2. **Model Training**:
    - Using XGBoost with hyperparameter tuning via `GridSearchCV`.
3. **Evaluation**:
    - Reporting metrics including accuracy, precision, recall, and F1-score.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import xgboost as xgb
import re

# Load your dataset here
data = pd.read_csv('/content/drive/MyDrive/DSGP PROJECT 29/DATASETS/glaucoma_dataset.csv')
print(data.columns)

# Function to convert Visual Acuity (e.g., '20/40') to a numerical ratio (e.g., 0.5)
def convert_visual_acuity(value):
    try:
        numerator, denominator = map(float, value.split('/'))
        return numerator / denominator
    except:
        return None

# Function to extract numeric values from OCT Results column
def extract_oct_features(value, feature_name):
    try:
        pattern = rf"{feature_name}: ([\d\.]+)"  # Regex pattern to extract numeric value
        match = re.search(pattern, value)
        return float(match.group(1)) if match else None
    except:
        return None

# Apply transformations
data['Visual Acuity Measurements'] = data['Visual Acuity Measurements'].apply(convert_visual_acuity)

# Create new columns for OCT numeric features
data['RNFL Thickness'] = data['Optical Coherence Tomography (OCT) Results'].apply(
    lambda x: extract_oct_features(x, 'RNFL Thickness'))
data['GCC Thickness'] = data['Optical Coherence Tomography (OCT) Results'].apply(
    lambda x: extract_oct_features(x, 'GCC Thickness'))
data['Retinal Volume'] = data['Optical Coherence Tomography (OCT) Results'].apply(
    lambda x: extract_oct_features(x, 'Retinal Volume'))
data['Macular Thickness'] = data['Optical Coherence Tomography (OCT) Results'].apply(
    lambda x: extract_oct_features(x, 'Macular Thickness'))

# Drop the original OCT column and unnecessary ones
data.drop(['Patient ID', 'Optical Coherence Tomography (OCT) Results'], axis=1, inplace=True)

# Drop rows with missing or invalid data
data.dropna(inplace=True)

# Separate features and target
X = data.drop(['Glaucoma Type'], axis=1)
y = data['Glaucoma Type']

# Encode categorical features
categorical_columns = ['Gender', 'Family History', 'Medical History', 'Medication Usage',
                       'Cataract Status', 'Angle Closure Status', 'Visual Symptoms', 'Diagnosis']
encoder = LabelEncoder()

for col in categorical_columns:
    X[col] = encoder.fit_transform(X[col])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

# Train the XGBoost model
model = xgb.XGBClassifier(random_state=42)
model.fit(X_train_scaled, y_train_resampled)

# Make predictions and evaluate the model
y_pred = model.predict(X_test_scaled)
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:

# Define parameter grid for XGBoost
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [4, 6, 8],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'random_state': [42]
}

# Grid search for best parameters
xgb_model = xgb.XGBClassifier()
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train_resampled)

# Best model
best_xgb = grid_search.best_estimator_


In [None]:

# Evaluate on test data
y_pred = best_xgb.predict(X_test_scaled)

# Classification report and accuracy
print("Best Parameters:", grid_search.best_params_)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
