In [70]:
import pandas as pd
import numpy as np
import torch 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import Dataset, DataLoader


In [72]:
# Dataset Loading and Preprocessing
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = ["age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
           "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
           "hours-per-week", "native-country", "income"]

# Assigning column names to data
df = pd.read_csv(url, names=columns, skipinitialspace=True)

# Drop fnlwgt as it's not useful for prediction
df.drop(columns=["fnlwgt"], inplace=True)

#converting categoricals to numerical Values
categorical_cols = ["workclass", "education", "marital-status", "occupation",
                    "relationship", "race", "sex", "native-country"]
df = pd.get_dummies(df, columns=categorical_cols)

# Mapping Target Variable
df["income"] = df["income"].map({"<=50K": 0, ">50K": 1})

# Splitting Target and Features 
X = df.drop(columns=["income"]).values  # Features
y = df["income"].values  # Target

#setting train and test ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)

# Scaling data / Normalizing data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [76]:
# Dataset and Data Loader in torch
class AdultDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Make target 2D
# Gettign the size of dataset
    def __len__(self):
        return len(self.X)
# getting item against feature and label
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
# Create datasets
train_dataset = AdultDataset(X_train, y_train)
test_dataset = AdultDataset(X_test, y_test)



In [78]:
import torch.nn as nn
import torch.optim as optim

class LinearModel(nn.Module):
    def __init__(self, input_dim):
        super(LinearModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return torch.sigmoid(self.linear(x))

# Initialize model
input_dim = X_train.shape[1]
model = LinearModel(input_dim)

# Loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [80]:
class TwoLayerNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super(TwoLayerNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        return torch.sigmoid(self.fc2(x))

# Initialize model
hidden_dim = 32
model = TwoLayerNN(input_dim, hidden_dim)

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [82]:
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

# Train the model
train_model(model, train_loader, criterion, optimizer, epochs=10)



Epoch [1/10], Loss: 0.3888
Epoch [2/10], Loss: 0.3247
Epoch [3/10], Loss: 0.3156
Epoch [4/10], Loss: 0.3114
Epoch [5/10], Loss: 0.3081
Epoch [6/10], Loss: 0.3056
Epoch [7/10], Loss: 0.3033
Epoch [8/10], Loss: 0.3009
Epoch [9/10], Loss: 0.2998
Epoch [10/10], Loss: 0.2978


In [86]:
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            y_pred = model(X_batch)
            y_pred = (y_pred >= 0.5).float()  # Convert to binary predictions
            correct += (y_pred == y_batch).sum().item()
            total += y_batch.size(0)

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy:.4f}")
    
# Evaluate the model
evaluate_model(model, test_loader)


Test Accuracy: 0.8627
