In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the dataset
clean_df = pd.read_csv('../clean_df.csv')
clean_df.head(5)

Unnamed: 0,EDUC,MARSTAT,SERVICES,LOS,PSOURCE,NOPRIOR,ARRESTS,EMPLOY,METHUSE,PSYPROB,...,TRNQFLG,BARBFLG,SEDHPFLG,INHFLG,OTCFLG,OTHERFLG,DIVISION,REGION,IDU,ALCDRUG
0,3,1,7,7,6,1,0,2,2,1,...,0,0,0,0,0,0,9,4,0,1
1,3,4,7,8,1,1,0,2,2,1,...,0,0,0,0,0,1,9,4,0,3
2,3,4,7,7,3,1,0,1,2,2,...,0,0,0,0,0,0,9,4,0,2
3,5,1,7,4,7,0,0,1,2,2,...,0,0,0,0,0,0,9,4,0,1
4,3,4,7,3,7,1,1,4,2,1,...,0,0,0,0,0,0,9,4,0,3


In [3]:
# Set the features variable 
X_train_nn = clean_df.drop(columns='REASON')
# Set the target variable
Y_train_nn = clean_df["REASON"]

# Split the data into training and test sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Split data into training and testing sets
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_train_nn, Y_train_nn, test_size=0.2, random_state=42)



In [4]:
# Standardize the data
scaler = StandardScaler()
X_train_nn = scaler.fit_transform(X_train_nn)
X_test_nn = scaler.transform(X_test_nn)

In [6]:
# Convert the Pandas Series to a NumPy array
y_train_array = y_train_nn.to_numpy()
y_test_array = y_test_nn.to_numpy()

# Now convert the NumPy array to a PyTorch tensor
y_train_tensor = torch.tensor(y_train_array, dtype=torch.long)
y_test_tensor = torch.tensor(y_test_array, dtype=torch.long)

# Define the neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)  # Assuming binary classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.softmax(self.fc3(x), dim=1)
        return x

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_nn, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_nn, dtype=torch.float32)


In [7]:
# Create DataLoader for training and validation
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize the model, loss function, and optimizer
input_dim = X_train_nn.shape[1]
model = NeuralNetwork(input_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.4f}')

Epoch [1/10], Loss: 0.4942
Epoch [2/10], Loss: 0.4926
Epoch [3/10], Loss: 0.4918
Epoch [4/10], Loss: 0.4908
Epoch [5/10], Loss: 0.4902
Epoch [6/10], Loss: 0.4897
Epoch [7/10], Loss: 0.4894
Epoch [8/10], Loss: 0.4890
Epoch [9/10], Loss: 0.4888
Epoch [10/10], Loss: 0.4885


In [1]:
# Validate the model, print the loss and accuracy
model.eval()
with torch.no_grad():
        val_loss = 0
        correct = 0
        total = 0
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

        val_loss /= len(test_loader.dataset)
        # val_loss = len(test_loader.dataset)
        validation_list.append(val_loss)
        accuracy = correct / total
        accuracy_list.append(accuracy)
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}')
plt.plot(validation_list)
plt.plot(accuracy_list)

NameError: name 'model' is not defined