<a href="https://colab.research.google.com/github/DinurakshanRavichandran/Visio-Glance/blob/Cataract-Model-NLP/01_cataract_detection_model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cataract Detection Model Training using Random Forest Classifier

Custom model used to detect if a patient has cataract or not.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib


In [None]:

# Load the dataset
file_path = "/content/drive/MyDrive/DSGP PROJECT 29/Updated Cataract/cataract_patient_data.csv"
data = pd.read_csv(file_path)

# Exploring the dataset
print("Dataset Shape:", data.shape)
print("Columns:", data.columns)
print("Sample Data:")
print(data.head())


Dataset Shape: (10000, 16)
Columns: Index(['Patient_ID', 'Age', 'Gender', 'Family_History', 'Blurred_Vision',
       'Night_Vision_Difficulty', 'Sensitivity_to_Light', 'Glare_Halos',
       'Frequent_Rx_Changes', 'Lens_Opacity_Type', 'Lens_Opacity_Grade',
       'Visual_Acuity', 'Intraocular_Pressure', 'Co_Existing_Conditions',
       'Surgery_Recommended', 'Follow_Up_Timeline'],
      dtype='object')
Sample Data:
   Patient_ID  Age  Gender Family_History Blurred_Vision  \
0      331808   62    Male             No             No   
1      905020   78    Male            Yes            Yes   
2      416145   76    Male             No             No   
3      834518   84  Female            Yes             No   
4      634557   44  Female            Yes             No   

  Night_Vision_Difficulty Sensitivity_to_Light Glare_Halos  \
0                      No                  Yes          No   
1                      No                   No          No   
2                      No          

In [None]:

# Data Preprocessing
# Handling missing values if any (in this case, dropping rows with missing values)
data.dropna(inplace=True)

# Feature Selection and Target Encoding
# Selecting relevant features
features = [
    'Age', 'Gender', 'Family_History', 'Blurred_Vision', 'Night_Vision_Difficulty',
    'Sensitivity_to_Light', 'Glare_Halos', 'Frequent_Rx_Changes', 'Lens_Opacity_Type',
    'Lens_Opacity_Grade', 'Visual_Acuity', 'Intraocular_Pressure', 'Co_Existing_Conditions'
]
target = 'Surgery_Recommended'

# Encoding categorical features
categorical_features = ['Gender', 'Family_History', 'Blurred_Vision', 'Night_Vision_Difficulty',
                        'Sensitivity_to_Light', 'Glare_Halos', 'Frequent_Rx_Changes',
                        'Lens_Opacity_Type', 'Visual_Acuity', 'Co_Existing_Conditions']
encoder = LabelEncoder()
for feature in categorical_features:
    data[feature] = encoder.fit_transform(data[feature])

# Encoding the target variable
data[target] = encoder.fit_transform(data[target])


In [None]:

# Splitting the data
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Handling class imbalances using SMOTE
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


In [None]:

# Normalizing the features
scaler = StandardScaler()
X_train_resampled = scaler.fit_transform(X_train_resampled)
X_test = scaler.transform(X_test)


In [None]:

# Training the model
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train_resampled, y_train_resampled)


In [None]:

# Evaluating the model
y_pred = model.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.54      0.55      0.54       768
           1       0.52      0.52      0.52       739

    accuracy                           0.53      1507
   macro avg       0.53      0.53      0.53      1507
weighted avg       0.53      0.53      0.53      1507

Confusion Matrix:
[[421 347]
 [358 381]]
Accuracy Score: 0.5321831453218314


In [None]:

# Saving the model
model_filename = "cataract_detection_model.pkl"
joblib.dump(model, model_filename)
print(f"Model saved as {model_filename}.")


Model saved as cataract_detection_model.pkl.
