In [2]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, TensorDataset
import transformers

In [3]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
           "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
           "hours-per-week", "native-country", "income"]

df = pd.read_csv(url, header=None, names=columns, skipinitialspace=True)  # Trim spaces

# Drop unnecessary columns
df.drop(columns=["fnlwgt"], inplace=True)

# Convert categorical variables to one-hot encoding
categorical_features = ["workclass", "education", "marital-status", "occupation",
                        "relationship", "race", "native-country", "sex"]
df = pd.get_dummies(df, columns=categorical_features, dtype=float)  # Ensure float dtype

# Convert income to binary
df["income"] = df["income"].map(lambda x: 0 if x == "<=50K" else 1)

X= df.drop(columns=["income"]).values
y= df["income"].values

X= torch.tensor(X, dtype=torch.float32)
y= torch.tensor(y, dtype=torch.long)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.6, random_state=42)
X_finetune, X_test, y_finetune, Y_test = train_test_split(X_temp, y_temp, test_size=0.3333, random_state=42)

train_loader = DataLoader(TensorDataset(X_train, y_train),batch_size=32,shuffle=True)
finetune_loader = DataLoader(TensorDataset(X_temp, y_temp), batch_size=32, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=32, shuffle=False)

In [4]:
import torch.nn as nn


class LinearModel(nn.Module):
    def __init__(self, input_dim):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return torch.sigmoid(self.linear(x))



In [5]:
class TwoLayerNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super(TwoLayerNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)
        
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        return torch.sigmoid(self.fc2(x))

In [6]:
def train_model(model, train_loader, epochs=10, lr=0.001):
   optimizer = torch.optim.Adam(model.parameters(), lr=lr)
   criterion = nn.BCELoss()
   model.train()

   for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred.squeeze(), y_batch.float())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


In [7]:
input_dim = X.shape[1]
output_dim = len(torch.unique(y))

linear_model = LinearModel(input_dim)
mlp_model = TwoLayerNN(input_dim, hidden_dim=32)

print("Training Linear Model")
train_model(linear_model, train_loader)

print("Training MLP Model")
train_model(mlp_model, train_loader)


Training Linear Model
Epoch 1, Loss: 3.5928
Epoch 2, Loss: 3.4600
Epoch 3, Loss: 4.5436
Epoch 4, Loss: 4.6617
Epoch 5, Loss: 4.1413
Epoch 6, Loss: 3.0217
Epoch 7, Loss: 3.0249
Epoch 8, Loss: 4.0635
Epoch 9, Loss: 4.0833
Epoch 10, Loss: 3.9214
Training MLP Model
Epoch 1, Loss: 1.1066
Epoch 2, Loss: 1.3547
Epoch 3, Loss: 1.0321
Epoch 4, Loss: 0.4341
Epoch 5, Loss: 1.8345
Epoch 6, Loss: 1.4303
Epoch 7, Loss: 0.3670
Epoch 8, Loss: 0.4139
Epoch 9, Loss: 0.3761
Epoch 10, Loss: 0.3739


In [20]:
import loralib as lora

# Modify the model to use LoRA in Linear Layers
class LoRAMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super(LoRAMLP, self).__init__()
        self.fc1 = lora.Linear(input_dim, hidden_dim, r=8)  # Low-rank adaptation
        self.relu = nn.ReLU()
        self.fc2 = lora.Linear(hidden_dim, 1, r=8)  # LoRA applied
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return torch.sigmoid(x)




In [24]:
print("Fine-tuning Linear Model with LoRA...")
train_model(linear_model, finetune_loader, lr=0.001)

print("Fine-tuning MLP Model with LoRA...")
mlp_model_lora = LoRAMLP(input_dim, hidden_dim=32)
train_model(mlp_model_lora, finetune_loader, epochs=20, lr=0.005)


Fine-tuning Linear Model with LoRA...
Epoch 1, Loss: 2.6496
Epoch 2, Loss: 2.9593
Epoch 3, Loss: 3.0333
Epoch 4, Loss: 3.0464
Epoch 5, Loss: 2.9925
Epoch 6, Loss: 3.6039
Epoch 7, Loss: 2.9815
Epoch 8, Loss: 3.3733
Epoch 9, Loss: 2.7831
Epoch 10, Loss: 5.1741
Fine-tuning MLP Model with LoRA...
Epoch 1, Loss: 1.4118
Epoch 2, Loss: 0.3808
Epoch 3, Loss: 0.3730
Epoch 4, Loss: 0.3743
Epoch 5, Loss: 0.3622
Epoch 6, Loss: 0.3822
Epoch 7, Loss: 0.3667
Epoch 8, Loss: 0.3593
Epoch 9, Loss: 0.3623
Epoch 10, Loss: 0.3704
Epoch 11, Loss: 0.4453
Epoch 12, Loss: 0.3527
Epoch 13, Loss: 0.3542
Epoch 14, Loss: 0.3564
Epoch 15, Loss: 0.3835
Epoch 16, Loss: 0.3569
Epoch 17, Loss: 0.3491
Epoch 18, Loss: 0.3524
Epoch 19, Loss: 0.3534
Epoch 20, Loss: 1.5519


In [25]:
from sklearn.metrics import accuracy_score

def evaluate_model(model, dataloader):
    model.eval()
    y_true, y_pred = [], []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            predictions = torch.argmax(outputs, dim=1)
            y_true.extend(labels.tolist())
            y_pred.extend(predictions.tolist())

    accuracy = accuracy_score(y_true, y_pred)
    return accuracy


In [26]:
print("Evaluating Linear Model...")
acc_linear = evaluate_model(linear_model, test_loader)
print(f"Linear Model Accuracy: {acc_linear:.4f}")

print("Evaluating MLP Model...")
acc_mlp = evaluate_model(mlp_model, test_loader)
print(f"MLP Model Accuracy: {acc_mlp:.4f}")

print("Evaluating MLP Model with LoRA...")
acc_mlp_lora = evaluate_model(mlp_model_lora, test_loader)
print(f"MLP Model with LoRA Accuracy: {acc_mlp_lora:.4f}")


Evaluating Linear Model...
Linear Model Accuracy: 0.7566
Evaluating MLP Model...
MLP Model Accuracy: 0.7566
Evaluating MLP Model with LoRA...
MLP Model with LoRA Accuracy: 0.7566


In [27]:
results = {
    "Linear Model": acc_linear,
    "MLP Model": acc_mlp,
    "MLP + LoRA": acc_mlp_lora
}
print("\nFinal Accuracy Comparison:")
for model, acc in results.items():
    print(f"{model}: {acc:.4%}")



Final Accuracy Comparison:
Linear Model: 75.6603%
MLP Model: 75.6603%
MLP + LoRA: 75.6603%
