In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler

# Generate or load your dataset (replace make_regression with your actual housing data)
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Data preprocessing
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train = scaler_X.fit_transform(X_train)
X_val = scaler_X.transform(X_val)
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1))
y_val = scaler_y.transform(y_val.reshape(-1, 1))

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

# Define the model
class HousingSingleLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Train function
def train_single_layer_housing_model(X_train, y_train, X_val, y_val, learning_rate, epochs):
    input_dim = X_train.shape[1]
    hidden_dim = 8
    model = HousingSingleLayerModel(input_dim, hidden_dim)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    loss_fn = nn.MSELoss()
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train)
        train_loss = loss_fn(predictions, y_train)
        train_loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_predictions = model(X_val)
            val_loss = loss_fn(val_predictions, y_val).item()

        if (epoch + 1) % 100 == 0:
            train_losses.append(train_loss.item())
            val_losses.append(val_loss)
            print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss.item():.4f}, Validation Loss: {val_loss:.4f}")

    print("Final Train Loss:", train_losses[-1])
    print("Final Validation Loss:", val_losses[-1])
    return model, train_losses, val_losses

# Train the model
single_layer_model, train_losses_1a, val_losses_1a = train_single_layer_housing_model(
    X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, learning_rate=0.05, epochs=1000
)


Epoch 100/1000 - Train Loss: 0.0240, Validation Loss: 0.0229
Epoch 200/1000 - Train Loss: 0.0175, Validation Loss: 0.0174
Epoch 300/1000 - Train Loss: 0.0152, Validation Loss: 0.0154
Epoch 400/1000 - Train Loss: 0.0139, Validation Loss: 0.0143
Epoch 500/1000 - Train Loss: 0.0129, Validation Loss: 0.0137
Epoch 600/1000 - Train Loss: 0.0121, Validation Loss: 0.0131
Epoch 700/1000 - Train Loss: 0.0113, Validation Loss: 0.0126
Epoch 800/1000 - Train Loss: 0.0105, Validation Loss: 0.0120
Epoch 900/1000 - Train Loss: 0.0096, Validation Loss: 0.0113
Epoch 1000/1000 - Train Loss: 0.0085, Validation Loss: 0.0104
Final Train Loss: 0.008531803265213966
Final Validation Loss: 0.01039207261055708


In [14]:
class HousingMultiLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dims):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        self.fc4 = nn.Linear(hidden_dims[2], 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.dropout(self.relu(self.fc3(x)))
        x = self.fc4(x)
        return x

def train_multi_layer_housing_model(X_train, y_train, X_val, y_val, learning_rate, epochs):
    input_dim = X_train.shape[1]
    hidden_dims = [64, 32, 16]
    model = HousingMultiLayerModel(input_dim, hidden_dims)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.MSELoss()
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train)
        train_loss = loss_fn(predictions, y_train)
        train_loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_predictions = model(X_val)
            val_loss = loss_fn(val_predictions, y_val).item()

        if (epoch + 1) % 100 == 0:
            train_losses.append(train_loss.item())
            val_losses.append(val_loss)
            print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss.item():.4f}, Validation Loss: {val_loss:.4f}")

    print("Final Train Loss:", train_losses[-1])
    print("Final Validation Loss:", val_losses[-1])
    return model, train_losses, val_losses

# Train the model
multi_layer_model, train_losses_1b, val_losses_1b = train_multi_layer_housing_model(
    X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, learning_rate=0.001, epochs=1000
)


Epoch 100/1000 - Train Loss: 0.0975, Validation Loss: 0.0274
Epoch 200/1000 - Train Loss: 0.0584, Validation Loss: 0.0096
Epoch 300/1000 - Train Loss: 0.0475, Validation Loss: 0.0063
Epoch 400/1000 - Train Loss: 0.0480, Validation Loss: 0.0055
Epoch 500/1000 - Train Loss: 0.0522, Validation Loss: 0.0064
Epoch 600/1000 - Train Loss: 0.0445, Validation Loss: 0.0049
Epoch 700/1000 - Train Loss: 0.0482, Validation Loss: 0.0044
Epoch 800/1000 - Train Loss: 0.0465, Validation Loss: 0.0061
Epoch 900/1000 - Train Loss: 0.0506, Validation Loss: 0.0045
Epoch 1000/1000 - Train Loss: 0.0376, Validation Loss: 0.0063
Final Train Loss: 0.037569064646959305
Final Validation Loss: 0.006254489999264479


In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler

# Load the cancer dataset
cancer_data = load_breast_cancer()
X = cancer_data.data
y = cancer_data.target

# Split and preprocess
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_val = scaler_X.transform(X_val)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)

# Define the single-hidden-layer model
class CancerSingleLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

# Train function
def train_single_layer_cancer_model(X_train, y_train, X_val, y_val, learning_rate, epochs):
    input_dim = X_train.shape[1]
    hidden_dim = 32
    model = CancerSingleLayerModel(input_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.BCELoss()  # Binary Cross-Entropy Loss
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train)
        train_loss = loss_fn(predictions, y_train)
        train_loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_predictions = model(X_val)
            val_loss = loss_fn(val_predictions, y_val).item()

        if (epoch + 1) % 100 == 0:
            train_losses.append(train_loss.item())
            val_losses.append(val_loss)
            print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss.item():.4f}, Validation Loss: {val_loss:.4f}")

    print("Final Train Loss:", train_losses[-1])
    print("Final Validation Loss:", val_losses[-1])
    return model, train_losses, val_losses

# Train the model
single_layer_cancer_model, train_losses_2a, val_losses_2a = train_single_layer_cancer_model(
    X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, learning_rate=0.001, epochs=1000
)


Epoch 100/1000 - Train Loss: 0.1647, Validation Loss: 0.1503
Epoch 200/1000 - Train Loss: 0.0857, Validation Loss: 0.0794
Epoch 300/1000 - Train Loss: 0.0616, Validation Loss: 0.0651
Epoch 400/1000 - Train Loss: 0.0495, Validation Loss: 0.0615
Epoch 500/1000 - Train Loss: 0.0412, Validation Loss: 0.0603
Epoch 600/1000 - Train Loss: 0.0347, Validation Loss: 0.0606
Epoch 700/1000 - Train Loss: 0.0290, Validation Loss: 0.0621
Epoch 800/1000 - Train Loss: 0.0239, Validation Loss: 0.0642
Epoch 900/1000 - Train Loss: 0.0195, Validation Loss: 0.0667
Epoch 1000/1000 - Train Loss: 0.0159, Validation Loss: 0.0697
Final Train Loss: 0.01588340476155281
Final Validation Loss: 0.06972919404506683


In [16]:
class CancerMultiLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dims):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        self.fc4 = nn.Linear(hidden_dims[2], 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.sigmoid(self.fc4(x))
        return x

# Train function
def train_multi_layer_cancer_model(X_train, y_train, X_val, y_val, learning_rate, epochs):
    input_dim = X_train.shape[1]
    hidden_dims = [64, 32, 16]
    model = CancerMultiLayerModel(input_dim, hidden_dims)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.BCELoss()
    train_losses, val_losses = [], []

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        predictions = model(X_train)
        train_loss = loss_fn(predictions, y_train)
        train_loss.backward()
        optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_predictions = model(X_val)
            val_loss = loss_fn(val_predictions, y_val).item()

        if (epoch + 1) % 100 == 0:
            train_losses.append(train_loss.item())
            val_losses.append(val_loss)
            print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss.item():.4f}, Validation Loss: {val_loss:.4f}")

    print("Final Train Loss:", train_losses[-1])
    print("Final Validation Loss:", val_losses[-1])
    return model, train_losses, val_losses

# Train the model
multi_layer_cancer_model, train_losses_2b, val_losses_2b = train_multi_layer_cancer_model(
    X_train_tensor, y_train_tensor, X_val_tensor, y_val_tensor, learning_rate=0.001, epochs=1000
)


Epoch 100/1000 - Train Loss: 0.0477, Validation Loss: 0.0601
Epoch 200/1000 - Train Loss: 0.0136, Validation Loss: 0.0695
Epoch 300/1000 - Train Loss: 0.0016, Validation Loss: 0.0906
Epoch 400/1000 - Train Loss: 0.0005, Validation Loss: 0.1075
Epoch 500/1000 - Train Loss: 0.0003, Validation Loss: 0.1200
Epoch 600/1000 - Train Loss: 0.0002, Validation Loss: 0.1297
Epoch 700/1000 - Train Loss: 0.0001, Validation Loss: 0.1372
Epoch 800/1000 - Train Loss: 0.0001, Validation Loss: 0.1436
Epoch 900/1000 - Train Loss: 0.0001, Validation Loss: 0.1493
Epoch 1000/1000 - Train Loss: 0.0000, Validation Loss: 0.1543
Final Train Loss: 4.543393515632488e-05
Final Validation Loss: 0.1542828530073166


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model with one hidden layer of 256 nodes
class CIFARSingleLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Train function
def train_single_layer_cifar_model(train_loader, test_loader, learning_rate, epochs):
    input_dim = 32 * 32 * 3  # CIFAR-10 images are 32x32x3
    hidden_dim = 256
    output_dim = 10  # 10 classes
    model = CIFARSingleLayerModel(input_dim, hidden_dim, output_dim)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    train_losses, test_accuracies = [], []
    start_time = time.time()

    for epoch in range(epochs):
        # Training step
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Validation step
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_losses.append(running_loss / len(train_loader))
        test_accuracies.append(100 * correct / total)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_losses[-1]:.4f}, Accuracy: {test_accuracies[-1]:.2f}%")

    end_time = time.time()
    training_time = end_time - start_time
    return model, train_losses, test_accuracies, training_time

# Train the model
single_layer_model, train_losses_3a, test_accuracies_3a, training_time_3a = train_single_layer_cifar_model(
    train_loader, test_loader, learning_rate=0.01, epochs=100
)

# Display results
print("Problem 3a Results (Single Hidden Layer):")
print("Training Time:", training_time_3a, "seconds")
print("Train Losses (last epoch):", train_losses_3a[-1])
print("Test Accuracy:", test_accuracies_3a[-1], "%")


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:15<00:00, 10.9MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1/100, Loss: 1.8846, Accuracy: 40.32%
Epoch 2/100, Loss: 1.6635, Accuracy: 43.82%
Epoch 3/100, Loss: 1.5794, Accuracy: 45.95%
Epoch 4/100, Loss: 1.5239, Accuracy: 46.78%
Epoch 5/100, Loss: 1.4802, Accuracy: 48.32%
Epoch 6/100, Loss: 1.4406, Accuracy: 48.96%
Epoch 7/100, Loss: 1.4071, Accuracy: 49.48%
Epoch 8/100, Loss: 1.3752, Accuracy: 49.92%
Epoch 9/100, Loss: 1.3459, Accuracy: 50.40%
Epoch 10/100, Loss: 1.3188, Accuracy: 51.25%
Epoch 11/100, Loss: 1.2936, Accuracy: 51.00%
Epoch 12/100, Loss: 1.2684, Accuracy: 51.75%
Epoch 13/100, Loss: 1.2459, Accuracy: 52.10%
Epoch 14/100, Loss: 1.2237, Accuracy: 52.32%
Epoch 15/100, Loss: 1.2033, Accuracy: 53.08%
Epoch 16/100, Loss: 1.1840, Accuracy: 51.88%
Epoch 17/100, Loss: 1.1636, Accuracy: 52.56%
Epoch 18/100, Loss: 1.1452, Accuracy: 52.65%
Epoch 19/100, Loss: 1.1279, Accuracy: 52.96%
Epoch 20/100, Loss: 1.1095, Accuracy: 52.47%
Epoch 21/100, Loss: 

In [9]:
# Define the model with three hidden layers
class CIFARMultiLayerModel(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(hidden_dims[1], hidden_dims[2])
        self.fc4 = nn.Linear(hidden_dims[2], output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Train function for multi-layer network
def train_multi_layer_cifar_model(train_loader, test_loader, learning_rate, epochs):
    input_dim = 32 * 32 * 3
    hidden_dims = [512, 256, 128]  # Three hidden layers
    output_dim = 10
    model = CIFARMultiLayerModel(input_dim, hidden_dims, output_dim)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    train_losses, test_accuracies = [], []
    start_time = time.time()

    for epoch in range(epochs):
        # Training step
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # Validation step
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        train_losses.append(running_loss / len(train_loader))
        test_accuracies.append(100 * correct / total)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_losses[-1]:.4f}, Accuracy: {test_accuracies[-1]:.2f}%")

    end_time = time.time()
    training_time = end_time - start_time
    return model, train_losses, test_accuracies, training_time

# Train the model
multi_layer_model, train_losses_3b, test_accuracies_3b, training_time_3b = train_multi_layer_cifar_model(
    train_loader, test_loader, learning_rate=0.01, epochs=100
)

# Display results
print("Problem 3b Results (Three Hidden Layers):")
print("Training Time:", training_time_3b, "seconds")
print("Train Losses (last epoch):", train_losses_3b[-1])
print("Test Accuracy:", test_accuracies_3b[-1], "%")


Epoch 1/100, Loss: 2.1856, Accuracy: 29.34%
Epoch 2/100, Loss: 1.8995, Accuracy: 36.61%
Epoch 3/100, Loss: 1.7320, Accuracy: 40.86%
Epoch 4/100, Loss: 1.6304, Accuracy: 43.67%
Epoch 5/100, Loss: 1.5553, Accuracy: 46.28%
Epoch 6/100, Loss: 1.4884, Accuracy: 47.91%
Epoch 7/100, Loss: 1.4295, Accuracy: 48.12%
Epoch 8/100, Loss: 1.3773, Accuracy: 49.80%
Epoch 9/100, Loss: 1.3295, Accuracy: 50.33%
Epoch 10/100, Loss: 1.2848, Accuracy: 51.81%
Epoch 11/100, Loss: 1.2439, Accuracy: 51.54%
Epoch 12/100, Loss: 1.2039, Accuracy: 51.77%
Epoch 13/100, Loss: 1.1677, Accuracy: 52.24%
Epoch 14/100, Loss: 1.1329, Accuracy: 48.65%
Epoch 15/100, Loss: 1.0998, Accuracy: 51.94%
Epoch 16/100, Loss: 1.0651, Accuracy: 51.57%
Epoch 17/100, Loss: 1.0324, Accuracy: 52.92%
Epoch 18/100, Loss: 1.0002, Accuracy: 52.51%
Epoch 19/100, Loss: 0.9679, Accuracy: 50.49%
Epoch 20/100, Loss: 0.9386, Accuracy: 50.97%
Epoch 21/100, Loss: 0.9076, Accuracy: 52.44%
Epoch 22/100, Loss: 0.8751, Accuracy: 53.82%
Epoch 23/100, Loss: