In [32]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def adult_dataset(fname):
    columns = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income']
    data = pd.read_csv(fname, names=columns, sep=',\s', na_values="?", engine='python')
    data.dropna(inplace=True)

    for col in ['workclass', 'education', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 'native_country', 'income']:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])

    X = data.drop('income', axis=1).values
    y = data['income'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test, y_train, y_test

class AdultDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return torch.tensor(self.features[idx], dtype=torch.float).to(device), torch.tensor(self.labels[idx], dtype=torch.long).to(device)

class DNNModel(nn.Module):
    def __init__(self):
        super(DNNModel, self).__init__()
        self.layer1 = nn.Linear(14,64)
        self.layer2 = nn.Linear(64,32)
        self.layer3 = nn.Linear(32,16)
        self.layer4 = nn.Linear(16,2)
        self.relu = nn.ReLU()
    def forward(self,x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.layer4(x)
        return x

X_train, X_test, y_train, y_test = adult_dataset('/content/drive/My Drive/adult.data')
train_dataset = AdultDataset(X_train, y_train)
test_dataset = AdultDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = DNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs =20
for epoch in range(epochs):
    model.train()
    running_loss=0.0
    for features,labels in train_loader:
        optimizer.zero_grad()
        outputs=model(features)
        loss=criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
    print(f'Epoch {epoch + 1}, Loss {running_loss/len(train_loader)}')

model.eval()
correct=0
total=0
with torch.no_grad():
    for features, labels in test_loader:
        outputs = model(features)
        _, predicted = torch.max(outputs.data,1)
        total+=labels.size(0)
        correct+=(predicted==labels).sum().item()
accuracy =100*correct/total
print(f'Accuracy: {accuracy}%')




Epoch 1, Loss 0.37054824879151266
Epoch 2, Loss 0.3300261752068541
Epoch 3, Loss 0.32619711956905045
Epoch 4, Loss 0.3236861278310077
Epoch 5, Loss 0.3213376080261842
Epoch 6, Loss 0.3204278574872563
Epoch 7, Loss 0.3177044071996485
Epoch 8, Loss 0.3154513647538105
Epoch 9, Loss 0.3139807550279239
Epoch 10, Loss 0.31196566987356156
Epoch 11, Loss 0.3099340831732932
Epoch 12, Loss 0.3079038630460055
Epoch 13, Loss 0.3062124520312739
Epoch 14, Loss 0.30439794404816084
Epoch 15, Loss 0.3027121424447489
Epoch 16, Loss 0.3011080264817667
Epoch 17, Loss 0.29953231913897826
Epoch 18, Loss 0.2980346078181085
Epoch 19, Loss 0.2963746211000981
Epoch 20, Loss 0.2944387579919728
Accuracy: 85.50157487830487%
