<a href="https://colab.research.google.com/github/DinurakshanRavichandran/Visio-Glance/blob/Glaucoma-Model/glaucoma_V04.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Enhanced Glaucoma Detection Model


## Steps to Improve Model Accuracy
1. **Data Preprocessing**:
    - Handling class imbalance with SMOTE (requires `imblearn` library).
    - Scaling features using `StandardScaler`.
2. **Model Training**:
    - Using XGBoost with hyperparameter tuning via `GridSearchCV`.
3. **Evaluation**:
    - Reporting metrics including accuracy, precision, recall, and F1-score.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import xgboost as xgb

# Load your dataset here (replace with actual loading code)
# Example:
try:
    data = pd.read_csv('/content/drive/MyDrive/DSGP PROJECT 29/Updated Glaucoma/glaucoma_dataset.csv')
    print("Columns in dataset:", data.columns)

    # Ensuring 'target' column exists
    target_column = 'Diagnosis'  # Replace with the correct target column name
    if target_column not in data.columns:
        raise ValueError(f"Target column '{target_column}' not found in the dataset.")

    X = data.drop(target_column, axis=1)
    y = data[target_column]
except Exception as e:
    print("Error loading dataset:", e)


Columns in dataset: Index(['Patient ID', 'Age', 'Gender', 'Visual Acuity Measurements',
       'Intraocular Pressure (IOP)', 'Cup-to-Disc Ratio (CDR)',
       'Family History', 'Medical History', 'Medication Usage',
       'Visual Field Test Results',
       'Optical Coherence Tomography (OCT) Results', 'Pachymetry',
       'Cataract Status', 'Angle Closure Status', 'Visual Symptoms',
       'Diagnosis', 'Glaucoma Type'],
      dtype='object')


In [None]:

# Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

from sklearn.preprocessing import LabelEncoder

# Identify categorical columns in X_train
categorical_columns = X_train.select_dtypes(include=['object']).columns

# Apply Label Encoding or One-Hot Encoding
# For simplicity, we'll use Label Encoding here
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    X_train[col] = le.fit_transform(X_train[col])
    X_test[col] = le.transform(X_test[col])  # Ensure test set is encoded consistently
    label_encoders[col] = le

# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)


ValueError: y contains previously unseen labels: 'Lisinopril, Amoxicillin, Aspirin, Omeprazole, Metformin, Atorvastatin, Ibuprofen'

In [None]:

# Define parameter grid for XGBoost
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [4, 6, 8],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'random_state': [42]
}

# Grid search for best parameters
xgb_model = xgb.XGBClassifier()
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train_resampled)

# Best model
best_xgb = grid_search.best_estimator_


NameError: name 'X_train_scaled' is not defined

In [None]:

# Evaluate on test data
y_pred = best_xgb.predict(X_test_scaled)

# Classification report and accuracy
print("Best Parameters:", grid_search.best_params_)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")


NameError: name 'best_xgb' is not defined