<a href="https://colab.research.google.com/github/Cbroh03/IntroToDL/blob/main/RTML_HW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch import cuda
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import ssl
device = 'cpu'

In [None]:
################################    Problem 1A:
data_path = '../../Datasets'
normalizing_set = datasets.CIFAR10(root=data_path, train=True, download=True, transform=transforms.ToTensor())
images = torch.stack([img_t for img_t, _ in normalizing_set], dim=3)
mean = images.view(3, -1).mean(dim=1)
std = images.view(3, -1).std(dim=1)

normalizer = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean,std)])

cifar_train = datasets.CIFAR10(root=data_path, train=True, transform=normalizer)
cifar_val = datasets.CIFAR10(root=data_path, train=False, transform=normalizer)

train_loader = DataLoader(cifar_train, batch_size=64, shuffle=True)
val_loader = DataLoader(cifar_val, batch_size=64, shuffle=False)
class CifarNet(nn.Module):
    def __init__(self):
        super(CifarNet, self).__init__()
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(32*32*3, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 32)
        self.fc4 = nn.Linear(32, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = CifarNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

epochs = 20

training_losses = []
training_accuracies = []
validation_losses = []
validation_accuracies = []
for epoch in range(epochs):
    model.train()

    running_loss = 0.0
    correct = 0
    total = 0

    with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{epochs}', unit=' batch') as pbar:
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            pbar.update()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        training_losses.append(running_loss / len(train_loader))
        training_accuracies.append(100 * correct / total)

        model.eval()

        running_loss = 0.0
        correct = 0
        total = 0
        predicted_labels = []
        true_labels = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                predicted_labels.extend(predicted.cpu().numpy())
                true_labels.extend(labels.cpu().numpy())

        validation_losses.append(running_loss / len(val_loader))
        validation_accuracies.append(100 * correct / total)

        pbar.set_postfix({'Training Loss ': training_losses[-1], 'Validation Loss ': validation_losses[-1]})

print("Final Training Loss:", training_losses[-1])
print("Final Validation Loss:", validation_losses[-1])
print("Final Training Accuracy:", training_accuracies[-1])
print("Final Validation Accuracy:", validation_accuracies[-1])
epochs_range = range(1, epochs + 1)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, training_losses, label='Training Loss')
plt.plot(epochs_range, validation_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 2)
plt.plot(epochs_range, training_accuracies, label='Training Accuracy')
plt.plot(epochs_range, validation_accuracies, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

confusion_matrix = metrics.confusion_matrix(true_labels, predicted_labels)
print(metrics.classification_report(true_labels, predicted_labels))

class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
plt.figure(figsize=(12, 8))
sns.heatmap(confusion_matrix, annot=True, fmt='d', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [None]:
######################################Problem 1B
data_path = '../../Datasets'
normalizing_set = datasets.CIFAR10(root=data_path, train=True, download=True, transform=transforms.ToTensor())
images = torch.stack([img_t for img_t, _ in normalizing_set], dim=3)
mean = images.view(3, -1).mean(dim=1)
std = images.view(3, -1).std(dim=1)

normalizer = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean,std)])

cifar_train = datasets.CIFAR10(root=data_path, train=True, transform=normalizer)
cifar_val = datasets.CIFAR10(root=data_path, train=False, transform=normalizer)

train_loader = DataLoader(cifar_train, batch_size=64, shuffle=True)
val_loader = DataLoader(cifar_val, batch_size=64, shuffle=False)
class CifarNet(nn.Module):
    def __init__(self):
        super(CifarNet, self).__init__()
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(32*32*3, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 16)
        self.fc6 = nn.Linear(16, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.relu(self.fc5(x))
        x = self.fc6(x)
        return x

model = CifarNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

epochs = 20

training_losses = []
training_accuracies = []
validation_losses = []
validation_accuracies = []
for epoch in range(epochs):
    model.train()

    running_loss = 0.0
    correct = 0
    total = 0

    with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{epochs}', unit=' batch') as pbar:
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            pbar.update()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        training_losses.append(running_loss / len(train_loader))
        training_accuracies.append(100 * correct / total)

        model.eval()

        running_loss = 0.0
        correct = 0
        total = 0
        predicted_labels = []
        true_labels = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                predicted_labels.extend(predicted.cpu().numpy())
                true_labels.extend(labels.cpu().numpy())

        validation_losses.append(running_loss / len(val_loader))
        validation_accuracies.append(100 * correct / total)

        pbar.set_postfix({'Training Loss ': training_losses[-1], 'Validation Loss ': validation_losses[-1]})

print("Final Training Loss:", training_losses[-1])
print("Final Validation Loss:", validation_losses[-1])
print("Final Training Accuracy:", training_accuracies[-1])
print("Final Validation Accuracy:", validation_accuracies[-1])
epochs_range = range(1, epochs + 1)

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, training_losses, label='Training Loss')
plt.plot(epochs_range, validation_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 2)
plt.plot(epochs_range, training_accuracies, label='Training Accuracy')
plt.plot(epochs_range, validation_accuracies, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

confusion_matrix = metrics.confusion_matrix(true_labels, predicted_labels)
print(metrics.classification_report(true_labels, predicted_labels))

class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
plt.figure(figsize=(12, 8))
sns.heatmap(confusion_matrix, annot=True, fmt='d', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch import cuda
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.preprocessing import StandardScaler as SS
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [None]:
######################            Problem 2A
full_df = pd.read_csv('../../Datasets/house-train.csv')

usefull_cols = ['OverallQual', 'GrLivArea', 'GarageArea', 'TotalBsmtSF', 'FullBath',
                'YearBuilt', 'YearRemodAdd', 'Fireplaces', 'LotFrontage','WoodDeckSF',
                'OpenPorchSF', 'ExterQual', 'Neighborhood', 'MSZoning', 'Alley', 'LotShape',
                'LandContour','Condition1','HouseStyle','MasVnrType','SaleCondition', 'SalePrice']

unscaled_df = full_df[usefull_cols].copy()

unscaled_X = unscaled_df.drop(['SalePrice'], axis=1)
unscaled_X = unscaled_X.apply(pd.to_numeric, errors='coerce').fillna(0)
unscaled_Y = unscaled_df['SalePrice'].values.reshape(-1,1)

X = SS().fit_transform(unscaled_X)
Y = SS().fit_transform(unscaled_Y)

x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, random_state=7)

x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
x_val = torch.tensor(x_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train = TensorDataset(x_train, y_train)
val = TensorDataset(x_val, y_val)

train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val, batch_size=64, shuffle=False)
class RegressNet(nn.Module):
    def __init__(self, input):
        super(RegressNet, self).__init__()
        self.fc1 = nn.Linear(input, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = RegressNet(input=x_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0008)

epochs = 100
training_losses = []
validation_losses = []
for epoch in range(epochs):
    model.train()

    running_loss = 0.0

    with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{epochs}', unit=' batch') as pbar:
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            pbar.update()

        training_losses.append(running_loss / len(train_loader))

        model.eval()

        running_loss = 0.0
        validation_total = 0.0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item() * inputs.size(0)
                validation_total += ((outputs - labels) ** 2).sum().item()

        validation_losses.append(running_loss / len(val_loader))
        rmse = np.sqrt(validation_total / len(val_loader.dataset))

        pbar.set_postfix({'Training Loss ': training_losses[-1], 'Validation Loss ': validation_losses[-1], 'Validation RMSE ': rmse})


print("Final Training Loss:", training_losses[-1])
print("Final Validation Loss:", validation_losses[-1])
print("Final Validation RMSE:", rmse)
plt.plot(training_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

In [None]:
##### Problem 2B
unscaled_df = full_df[usefull_cols].copy()

unencoded_unscaled_X = pd.get_dummies(unscaled_df, columns=['Neighborhood','MSZoning','Alley','LotShape','LandContour','Condition1','HouseStyle','MasVnrType','SaleCondition'])
encoded_unscaled_X = unencoded_unscaled_X.replace({True: 1, False: 0})
encoded_unscaled_X = encoded_unscaled_X.apply(pd.to_numeric, errors='coerce').fillna(0)

unscaled_Y = unscaled_df['SalePrice'].values.reshape(-1,1)

X = SS().fit_transform(encoded_unscaled_X)
Y = SS().fit_transform(unscaled_Y)

x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, random_state=7)

x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
x_val = torch.tensor(x_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train = TensorDataset(x_train, y_train)
val = TensorDataset(x_val, y_val)

train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val, batch_size=64, shuffle=False)
class RegressNet(nn.Module):
    def __init__(self, input):
        super(RegressNet, self).__init__()
        self.fc1 = nn.Linear(input, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = RegressNet(input=x_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0008)

epochs = 100
training_losses = []
validation_losses = []
for epoch in range(epochs):
    model.train()

    running_loss = 0.0

    with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{epochs}', unit=' batch') as pbar:
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            pbar.update()

        training_losses.append(running_loss / len(train_loader))

        model.eval()

        running_loss = 0.0
        validation_total = 0.0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item() * inputs.size(0)
                validation_total += ((outputs - labels) ** 2).sum().item()

        validation_losses.append(running_loss / len(val_loader))
        rmse = np.sqrt(validation_total / len(val_loader))

        pbar.set_postfix({'Training Loss ': training_losses[-1], 'Validation Loss ': validation_losses[-1], 'Validation RMSE ': rmse})

print("Final Training Loss:", training_losses[-1])
print("Final Validation Loss:", validation_losses[-1])
print("Final Validation RMSE:", rmse)
plt.plot(training_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

In [None]:
#####   problem 2C

unscaled_df = full_df[usefull_cols].copy()

unencoded_unscaled_X = pd.get_dummies(unscaled_df, columns=['Neighborhood','MSZoning','Alley','LotShape','LandContour','Condition1','HouseStyle','MasVnrType','SaleCondition'])
encoded_unscaled_X = unencoded_unscaled_X.replace({True: 1, False: 0})
encoded_unscaled_X = encoded_unscaled_X.apply(pd.to_numeric, errors='coerce').fillna(0)

unscaled_Y = unscaled_df['SalePrice'].values.reshape(-1,1)

X = SS().fit_transform(encoded_unscaled_X)
Y = SS().fit_transform(unscaled_Y)

x_train, x_val, y_train, y_val = train_test_split(X, Y, train_size=0.8, random_state=7)

x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
x_val = torch.tensor(x_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train = TensorDataset(x_train, y_train)
val = TensorDataset(x_val, y_val)

train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val, batch_size=64, shuffle=False)

class RegressNet(nn.Module):
    def __init__(self, input):
        super(RegressNet, self).__init__()
        self.fc1 = nn.Linear(input, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 16)
        self.fc5 = nn.Linear(16, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

model = RegressNet(input=x_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0008)

epochs = 100
training_losses = []
validation_losses = []
for epoch in range(epochs):
    model.train()

    running_loss = 0.0

    with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{epochs}', unit=' batch') as pbar:
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            pbar.update()

        training_losses.append(running_loss / len(train_loader))

        model.eval()

        running_loss = 0.0
        validation_total = 0.0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item() * inputs.size(0)
                validation_total += ((outputs - labels) ** 2).sum().item()

        validation_losses.append(running_loss / len(val_loader))
        rmse = np.sqrt(validation_total / len(val_loader.dataset))

        pbar.set_postfix({'Training Loss ': training_losses[-1], 'Validation Loss ': validation_losses[-1], 'Validation RMSE ': rmse})

print("Final Training Loss:", training_losses[-1])
print("Final Validation Loss:", validation_losses[-1])
print("Final Validation RMSE:", rmse)
plt.plot(training_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()