# Mount Google Drive to save/load files

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Step 1: Import Libraries

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [3]:
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.17.1


# Step 2: Load the Dataset
# Update the path to the dataset in Google Drive

In [4]:
data_path = "/content/drive/My Drive/MyopiaRiskFactor/myopia_risk_dataset.csv"
df = pd.read_csv(data_path)

In [5]:
print(df.describe())

       snellen_score  estimated_cylinder_power  axis_of_astigmatism  \
count    7500.000000               7500.000000          7500.000000   
mean        0.271985                  1.129494            89.293572   
std         0.223095                  0.503307            51.818818   
min         0.100000                  0.250208             0.001673   
25%         0.125000                  0.703880            44.346472   
50%         0.181818                  1.135271            88.894064   
75%         0.333333                  1.561562           133.459723   
max         1.000000                  1.999679           179.980680   

        blur_score  daily_screen_time  daily_outdoor_activities  \
count  7500.000000        7500.000000               7500.000000   
mean     50.597218          11.922982                 12.048203   
std      28.895955           6.909596                  6.942977   
min       0.002052           0.002457                  0.002238   
25%      25.710069       

# Step 3: Separate Features and Target

In [6]:
X = df.drop(columns=['myopia_risk_factor'])
y = df['myopia_risk_factor']

# Step 4: Preprocess Data
# Define numerical and categorical features

In [7]:
categorical_cols = ['reading_posture']
numerical_cols = ['snellen_score', 'estimated_cylinder_power', 'axis_of_astigmatism',
                  'blur_score', 'daily_screen_time', 'daily_outdoor_activities',
                  'reading_distance', 'screen_breaks_per_hour']

# Create a column transformer to preprocess the data

In [8]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Split the data into training and testing sets

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit and transform the training data; transform the testing data

In [10]:
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

# Step 5: Define the ANN Model

In [11]:
def create_ann_model(input_dim):
    model = Sequential()
    model.add(Dense(128, input_dim=input_dim, activation='relu'))  # First hidden layer
    model.add(Dropout(0.2))  # Dropout to prevent overfitting
    model.add(Dense(64, activation='relu'))  # Second hidden layer
    model.add(Dropout(0.2))  # Dropout again
    model.add(Dense(32, activation='relu'))  # Third hidden layer
    model.add(Dense(1, activation='sigmoid'))  # Output layer with sigmoid to keep output in [0, 1]

    model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=['mae'])
    return model

# Step 6: Train the Model

In [12]:
ann_model = create_ann_model(X_train_preprocessed.shape[1])
history = ann_model.fit(X_train_preprocessed, y_train, epochs=50, batch_size=32, validation_data=(X_test_preprocessed, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0541 - mae: 0.1857 - val_loss: 0.0045 - val_mae: 0.0492
Epoch 2/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0090 - mae: 0.0672 - val_loss: 0.0027 - val_mae: 0.0367
Epoch 3/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0060 - mae: 0.0537 - val_loss: 0.0028 - val_mae: 0.0392
Epoch 4/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0045 - mae: 0.0453 - val_loss: 0.0020 - val_mae: 0.0316
Epoch 5/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0038 - mae: 0.0426 - val_loss: 0.0020 - val_mae: 0.0322
Epoch 6/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0034 - mae: 0.0402 - val_loss: 0.0016 - val_mae: 0.0275
Epoch 7/50
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - 

# Step 7: Evaluate the Model

In [13]:
y_pred = ann_model.predict(X_test_preprocessed)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R-squared: {r2:.4f}")

val_loss, val_mae = history.history['val_loss'][-1], history.history['val_mae'][-1]
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation MAE: {val_mae:.4f}")

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Mean Squared Error (MSE): 0.0019
R-squared: 0.9753
Validation Loss: 0.0019
Validation MAE: 0.0288


# Step 8: Save the Model
# Save the trained model to Google Drive

In [23]:
model_save_path = "/content/drive/My Drive/myopia_risk_model.h5"
ann_model.save(model_save_path)
print(f"Model saved to {model_save_path}")



Model saved to /content/drive/My Drive/myopia_risk_model.h5


# Step 9: Predict for a Single Record
# Example record for prediction

In [15]:
example_record = {
    "snellen_score": 0.12,
    "estimated_cylinder_power": 1.00,
    "axis_of_astigmatism": 100,
    "blur_score": 75,
    "daily_screen_time": 8,
    "daily_outdoor_activities": 2,
    "reading_distance": 30,
    "screen_breaks_per_hour": 3,
    "reading_posture": "Fair",
    "lightening_condition": "Moderate"
}

# Convert the record into a DataFrame

In [16]:
example_df = pd.DataFrame([example_record])

# Preprocess the single record

In [17]:
processed_example = preprocessor.transform(example_df)

# Make the prediction

In [18]:
risk_factor = ann_model.predict(processed_example)
print(f"Predicted Myopia Risk Factor: {risk_factor[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predicted Myopia Risk Factor: 0.58


In [19]:
print(risk_factor)

[[0.5845373]]
