<a href="https://colab.research.google.com/github/PranavSingla122/Fatty-Liver-Analysis/blob/main/fattyliver.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Load the datasets
train_df = pd.read_csv('/content/drive/MyDrive/fatty liver/train.csv')  # Update with actual URL
test_df = pd.read_csv('/content/drive/MyDrive/fatty liver/test.csv')  # Update with actual URL

# Drop unnecessary columns
drop_columns = ['Unnamed: 0', 'FBID', 'SampleID', 'Fibrosis', 'group']
train_df = train_df.drop(columns=[col for col in drop_columns if col in train_df.columns])
test_df = test_df.drop(columns=[col for col in drop_columns if col in test_df.columns])

# Ensure all columns are numeric
train_df = train_df.apply(pd.to_numeric, errors='coerce')
test_df = test_df.apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values
train_df = train_df.dropna()
test_df = test_df.dropna()

# Encode categorical variable 'Sex' consistently across train and validation sets
label_encoder = LabelEncoder()
train_df['Sex'] = label_encoder.fit_transform(train_df['Sex'])
test_df['Sex'] = label_encoder.transform(test_df['Sex'])

# Check if 'label' column exists, otherwise create it
if 'label' not in train_df.columns:
    train_df['label'] = (train_df['BMI'] > 25).astype(int)  # Example threshold for fatty liver
if 'label' not in test_df.columns:
    test_df['label'] = (test_df['BMI'] > 25).astype(int)  # Ensure same logic for validation set

# Extract features and labels
X_train = train_df.drop(columns=['label']).values
y_train = train_df['label'].values
X_test = test_df.drop(columns=['label']).values
y_test = test_df['label'].values

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define PyTorch Dataset class
class LiverDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoaders
train_dataset = LiverDataset(X_train, y_train)
test_dataset = LiverDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define Neural Network class
class LiverClassifier(nn.Module):
    def __init__(self, input_size):
        super(LiverClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = self.fc3(x)  # Removed sigmoid activation
        return x

# Model initialization
input_size = X_train.shape[1]
model = LiverClassifier(input_size)

# Loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=5e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

# Early stopping parameters
patience = 5
best_loss = np.inf
counter = 0

# Training loop
def train_model(model, train_loader, test_loader, criterion, optimizer, scheduler, epochs=50):
    global best_loss, counter
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        val_loss = evaluate_model(model, test_loader)
        scheduler.step(val_loss)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}")

        # Early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered")
                break

# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            total_loss += loss.item()
            predictions = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predictions == y_batch).sum().item()
            total += y_batch.size(0)
    val_loss = total_loss / len(test_loader)
    accuracy = correct / total
    print(f"Testing Accuracy: {accuracy:.4f}")
    return val_loss

# Train and evaluate
train_model(model, train_loader, test_loader, criterion, optimizer, scheduler)




Testing Accuracy: 0.8027
Epoch 1/50, Loss: 0.6280, Val Loss: 0.6059
Testing Accuracy: 0.9184
Epoch 2/50, Loss: 0.4848, Val Loss: 0.5067
Testing Accuracy: 0.9252
Epoch 3/50, Loss: 0.4037, Val Loss: 0.4391
Testing Accuracy: 0.9388
Epoch 4/50, Loss: 0.3454, Val Loss: 0.3871
Testing Accuracy: 0.9320
Epoch 5/50, Loss: 0.3088, Val Loss: 0.3568
Testing Accuracy: 0.9320
Epoch 6/50, Loss: 0.2662, Val Loss: 0.3369
Testing Accuracy: 0.9388
Epoch 7/50, Loss: 0.2500, Val Loss: 0.3138
Testing Accuracy: 0.9456
Epoch 8/50, Loss: 0.2336, Val Loss: 0.2888
Testing Accuracy: 0.9456
Epoch 9/50, Loss: 0.2412, Val Loss: 0.2754
Testing Accuracy: 0.9456
Epoch 10/50, Loss: 0.2122, Val Loss: 0.2763
Testing Accuracy: 0.9524
Epoch 11/50, Loss: 0.1830, Val Loss: 0.2610
Testing Accuracy: 0.9320
Epoch 12/50, Loss: 0.2071, Val Loss: 0.2525
Testing Accuracy: 0.9388
Epoch 13/50, Loss: 0.1979, Val Loss: 0.2505
Testing Accuracy: 0.9320
Epoch 14/50, Loss: 0.1692, Val Loss: 0.2470
Testing Accuracy: 0.9456
Epoch 15/50, Loss: