In [62]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn


In [63]:
df = pd.read_csv('myopia.csv', sep = ';')

In [64]:

from sklearn.model_selection import train_test_split
import pandas as pd
import torch.nn.functional as F

# Assuming your DataFrame is `df`
# Step 1: Split into training (75%) and remaining (25%)
datatrain, temp_data = train_test_split(df, test_size=0.25, stratify=df['MYOPIC'], random_state=2)

# Step 2: Split remaining data (25%) into validation (60% of remaining) and test (40% of remaining)
dataval, datatest = train_test_split(temp_data, test_size=0.4, stratify=temp_data['MYOPIC'], random_state=21)

x_train = torch.tensor(datatrain[['AGE', 'GENDER', 'DIOPTERHR', 'MOMMY', 'DADMY']].values, dtype=torch.float32)
x_val = torch.tensor(dataval[['AGE', 'GENDER', 'DIOPTERHR', 'MOMMY', 'DADMY']].values, dtype=torch.float32)
x_test = torch.tensor(datatest[['AGE', 'GENDER', 'DIOPTERHR', 'MOMMY', 'DADMY']].values, dtype=torch.float32)
y_train = torch.tensor(datatrain['MYOPIC'].values, dtype=torch.long)
y_val = torch.tensor(dataval['MYOPIC'].values, dtype=torch.long)
y_test = torch.tensor(datatest['MYOPIC'].values, dtype=torch.long)

y_train = F.one_hot(y_train, num_classes=2)
y_val = F.one_hot(y_val, num_classes=2)
y_test = F.one_hot(y_test, num_classes=2)

# Confirm the sizes
print(f"Train Data Size: {len(datatrain)} ({len(datatrain)/len(df)*100:.2f}%)")
print(f"Validation Data Size: {len(dataval)} ({len(dataval)/len(df)*100:.2f}%)")
print(f"Test Data Size: {len(datatest)} ({len(datatest)/len(df)*100:.2f}%)")


Train Data Size: 463 (74.92%)
Validation Data Size: 93 (15.05%)
Test Data Size: 62 (10.03%)


, 'SPORTHR', 'READHR', 'COMPHR', 'STUDYHR', 'TVHR'

In [65]:
input_dim = 5
hidden_dims = [5, 5]
output_dim = 2

In [66]:
df

Unnamed: 0,ID,STUDYYEAR,MYOPIC,AGE,GENDER,SPHEQ,AL,ACD,LT,VCD,SPORTHR,READHR,COMPHR,STUDYHR,TVHR,DIOPTERHR,MOMMY,DADMY
0,1,1992,1,6,1,-0.052,21.89,3.690,3.498,14.70,45,8,0,0,10,34,1,1
1,2,1995,0,6,1,0.608,22.38,3.702,3.392,15.29,4,0,1,1,7,12,1,1
2,3,1991,0,6,1,1.179,22.49,3.462,3.514,15.52,14,0,2,0,10,14,0,0
3,4,1990,1,6,1,0.525,22.20,3.862,3.612,14.73,18,11,0,0,4,37,0,1
4,5,1995,0,5,0,0.697,23.29,3.676,3.454,16.16,14,0,0,0,4,4,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
613,614,1995,1,6,0,0.678,22.40,3.663,3.803,14.93,2,0,7,3,14,37,1,0
614,615,1993,0,6,1,0.665,22.50,3.570,3.378,15.56,6,0,1,0,8,10,1,1
615,616,1995,0,6,0,1.834,22.94,3.624,3.424,15.89,8,0,0,0,4,4,1,1
616,617,1991,0,6,1,0.665,21.92,3.688,3.598,14.64,12,2,1,0,15,23,0,0


In [67]:
import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim):
        super(Model, self).__init__()
        layers = []
        in_dim = input_dim
        
        for h in hidden_dims:
            layer = nn.Linear(in_dim, h)
            # Initialize weights
            nn.init.normal_(layer.weight)
            layers.append(layer)
            layers.append(nn.ReLU())
            in_dim = h
        
        final_layer = nn.Linear(in_dim, output_dim)
        nn.init.normal_(final_layer.weight)
        layers.append(final_layer)
        
        # Final Softmax to get probabilities
        layers.append(nn.Softmax(dim=1))
        
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


In [68]:
def cross_entropy_one_hot(probabilities, one_hot_targets, epsilon=1e-7):
    """
    probabilities: (batch_size, num_classes) after softmax
    one_hot_targets: (batch_size, num_classes), one-hot encoded
    """
    # Clamp probabilities to avoid log(0)
    probs_clamped = torch.clamp(probabilities, min=epsilon, max=1.0 - epsilon)
    # Compute cross-entropy
    ce = -torch.sum(one_hot_targets * torch.log(probs_clamped), dim=1)
    return torch.mean(ce)


In [69]:
def compute_accuracy(model, x, y_one_hot):
    """
    x: inputs (batch_size, input_dim)
    y_one_hot: one-hot labels (batch_size, num_classes)
    """
    model.eval()
    with torch.no_grad():
        preds = model(x)  # Already probabilities
        # Argmax along dim=1 to get the predicted class index
        pred_classes = preds.argmax(dim=1)
        # Argmax along dim=1 to get the true class index from one-hot
        true_classes = y_one_hot.argmax(dim=1)
        acc = (pred_classes == true_classes).float().mean().item()
    model.train()
    return acc


In [70]:
import torch.optim as optim
from tqdm import tqdm

def train_model(model, 
                x_train, y_train, 
                x_val, y_val, 
                x_test, y_test, 
                epochs=10, lr=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in tqdm(range(epochs), desc="Training"):
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        output = model(x_train)               # shape: (batch_size, 2)
        loss = cross_entropy_one_hot(output, y_train)  # y_train is one-hot
        
        # Backprop
        loss.backward()
        optimizer.step()
        
        # Compute accuracies
        train_acc = compute_accuracy(model, x_train, y_train)
        val_acc   = compute_accuracy(model, x_val,   y_val)
        test_acc  = compute_accuracy(model, x_test,  y_test)
        
        tqdm.write(
            f"Epoch {epoch+1}/{epochs} | "
            f"Loss: {loss.item():.4f} | "
            f"Train Acc: {train_acc:.4f} | "
            f"Val Acc: {val_acc:.4f} | "
            f"Test Acc: {test_acc:.4f}"
        )


In [71]:
model0 = Model(input_dim = input_dim, hidden_dims = hidden_dims, output_dim = output_dim)
model0

Model(
  (net): Sequential(
    (0): Linear(in_features=5, out_features=5, bias=True)
    (1): ReLU()
    (2): Linear(in_features=5, out_features=5, bias=True)
    (3): ReLU()
    (4): Linear(in_features=5, out_features=2, bias=True)
    (5): Softmax(dim=1)
  )
)

In [72]:
train_model(model0, x_train, y_train, x_val, y_val, x_test, y_test, epochs=10, lr=0.15)

Training: 100%|██████████| 10/10 [00:00<00:00, 169.50it/s]

Epoch 1/10 | Loss: 2.0179 | Train Acc: 0.2916 | Val Acc: 0.2151 | Test Acc: 0.2258
Epoch 2/10 | Loss: 0.6946 | Train Acc: 0.8683 | Val Acc: 0.8710 | Test Acc: 0.8710
Epoch 3/10 | Loss: 0.5875 | Train Acc: 0.8683 | Val Acc: 0.8710 | Test Acc: 0.8710
Epoch 4/10 | Loss: 0.6544 | Train Acc: 0.8683 | Val Acc: 0.8710 | Test Acc: 0.8710
Epoch 5/10 | Loss: 0.4785 | Train Acc: 0.8575 | Val Acc: 0.8602 | Test Acc: 0.8710
Epoch 6/10 | Loss: 0.4906 | Train Acc: 0.8618 | Val Acc: 0.8602 | Test Acc: 0.8710
Epoch 7/10 | Loss: 0.4463 | Train Acc: 0.8683 | Val Acc: 0.8710 | Test Acc: 0.8710
Epoch 8/10 | Loss: 0.4145 | Train Acc: 0.8251 | Val Acc: 0.8495 | Test Acc: 0.7903
Epoch 9/10 | Loss: 0.4346 | Train Acc: 0.8683 | Val Acc: 0.8710 | Test Acc: 0.8710
Epoch 10/10 | Loss: 0.3914 | Train Acc: 0.8683 | Val Acc: 0.8710 | Test Acc: 0.8710





In [73]:
import torch

# Original input (dtype = long, shape = [5])
input_long = torch.tensor([7, 1, 9, 0, 1]).long()

# 1. Convert to float32
# 2. Add a batch dimension at the beginning (so shape is [1, 5])
input_float = input_long.unsqueeze(0).float()  # shape: (1, 5)

# Pass it through the model
output = model0(input_float)
print("Model output:", output)


Model output: tensor([[0.8717, 0.1283]], grad_fn=<SoftmaxBackward0>)


In [74]:
torch.save(model0.state_dict(), "model0.pth")

In [75]:
# import requests

# # Backend API URL
# url = "http://127.0.0.1:5000/survey"  # Replace 'endpoint' with your actual route

# try:
#     # Make a GET request
#     response = requests.get(url)

#     # Check if the request was successful
#     if response.status_code == 200:
#         # Parse the JSON response
#         data1 = response.json()
#         print("Data fetched successfully:", data1)
#     else:
#         print(f"Failed to fetch data. Status code: {response.status_code}")
#         print("Response:", response.text)
# except Exception as e:
#     print("Error occurred:", e)

Error occurred: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /survey (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000028FCBA6EDD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))


In [76]:
# data1

{'survey': [{'age': 5,
   'dad': 1,
   'gender': 'Male',
   'id': 1,
   'mom': 1,
   'name': 'Niteesh',
   'time_reading': 53,
   'time_sports': 21,
   'time_studying': 342,
   'time_tv': 54,
   'time_video_games': 564}]}