In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torchvision.models import vit_b_16, ViT_L_16_Weights, ViT_H_14_Weights

In [34]:
device = torch.device("mps")

transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [8]:
device = torch.device("mps")

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [17]:
def train_and_evaluate(model, epochs=5):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        # scheduler.step()
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}")

    # Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Accuracy: {100 * correct / total:.2f}%")

In [9]:
class LinearClassifier(nn.Module):
    def __init__(self):
        super(LinearClassifier, self).__init__()
        self.fc = nn.Linear(28 * 28, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten input
        return self.fc(x)


# Train and evaluate
model = LinearClassifier()
train_and_evaluate(model, 10)

RuntimeError: shape '[-1, 784]' is invalid for input of size 196608

In [25]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = self.relu(self.fc1(x))
        return self.fc2(x)


# Train and evaluate
model = CNN()
train_and_evaluate(model, 10)

Epoch 1, Loss: 0.1672
Epoch 2, Loss: 0.0482
Epoch 3, Loss: 0.0335
Epoch 4, Loss: 0.0229
Epoch 5, Loss: 0.0180
Epoch 6, Loss: 0.0148
Epoch 7, Loss: 0.0048
Epoch 8, Loss: 0.0022
Epoch 9, Loss: 0.0019
Epoch 10, Loss: 0.0012
Accuracy: 99.27%


In [13]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = Net()
train_and_evaluate(model, 25)

Epoch 1, Loss: 1.6626
Epoch 2, Loss: 1.3536
Epoch 3, Loss: 1.2238
Epoch 4, Loss: 1.1379
Epoch 5, Loss: 1.0740
Epoch 6, Loss: 1.0213
Epoch 7, Loss: 0.9191
Epoch 8, Loss: 0.8979
Epoch 9, Loss: 0.8777
Epoch 10, Loss: 0.8601
Epoch 11, Loss: 0.8452
Epoch 12, Loss: 0.8303
Epoch 13, Loss: 0.7929
Epoch 14, Loss: 0.7857
Epoch 15, Loss: 0.7803
Epoch 16, Loss: 0.7756
Epoch 17, Loss: 0.7709
Epoch 18, Loss: 0.7663
Epoch 19, Loss: 0.7536
Epoch 20, Loss: 0.7518
Epoch 21, Loss: 0.7501
Epoch 22, Loss: 0.7492
Epoch 23, Loss: 0.7472
Epoch 24, Loss: 0.7457
Epoch 25, Loss: 0.7420
Accuracy: 65.36%


In [36]:
class MNISTViT(nn.Module):
    def __init__(self):
        super(MNISTViT, self).__init__()
        # Load a pretrained ViT model
        self.vit = vit_b_16(pretrained=True)

        # Modify the classifier to match the number of classes in MNIST
        self.vit.heads = nn.Linear(self.vit.hidden_dim, 10)

    def forward(self, x):
        return self.vit(x)

# Instantiate and train
model = MNISTViT()
model.to(device)
train_and_evaluate(model, 10)

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /Users/nicolae.mogage/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
10.4%


KeyboardInterrupt: 

In [26]:
class CNNTransformer(nn.Module):
    def __init__(self):
        super(CNNTransformer, self).__init__()
        # CNN feature extractor
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

        # Transformer encoder
        self.embedding_dim = 64 * 7 * 7  # Output size of CNN layers after pooling
        self.embedding = nn.Linear(self.embedding_dim, 128)
        encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=4)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)

        # Classification head
        self.fc = nn.Linear(128, 10)

    def forward(self, x):
        # CNN feature extraction
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))

        # Flatten and transform to embedding
        x = x.view(x.size(0), -1)  # Flattening into [batch_size, embedding_dim]
        x = self.embedding(x).unsqueeze(1)  # [batch_size, seq_len=1, embedding_dim=128]

        # Transformer and classification
        x = self.transformer(x)  # Transformer expects [batch_size, seq_len, embedding_dim]
        x = x.mean(dim=1)  # Aggregate sequence dimension (global average pooling)
        return self.fc(x)


# Train and evaluate
model = CNNTransformer()
train_and_evaluate(model, 10)

Epoch 1, Loss: 0.2780
Epoch 2, Loss: 0.0693
Epoch 3, Loss: 0.0522
Epoch 4, Loss: 0.0418
Epoch 5, Loss: 0.0382
Epoch 6, Loss: 0.0307
Epoch 7, Loss: 0.0129
Epoch 8, Loss: 0.0090
Epoch 9, Loss: 0.0074
Epoch 10, Loss: 0.0071
Accuracy: 99.11%


In [18]:
class CNNTransformer(nn.Module):
    def __init__(self, num_classes=10):
        super(CNNTransformer, self).__init__()

        # CNN feature extractor
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

        # Transformer encoder
        self.embedding_dim = 64 * 8 * 8  # Output size of CNN layers after pooling
        self.embedding = nn.Linear(self.embedding_dim, 128)  # Project to Transformer dim
        encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=4, dim_feedforward=256)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)

        # Classification head
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        # CNN feature extraction
        x = self.pool(self.relu(self.conv1(x)))  # [batch_size, 32, 16, 16]
        x = self.pool(self.relu(self.conv2(x)))  # [batch_size, 64, 8, 8]

        # Flatten and transform to embedding
        x = x.view(x.size(0), -1)  # Flatten to [batch_size, embedding_dim]
        x = self.embedding(x).unsqueeze(1)  # [batch_size, seq_len=1, embedding_dim=128]

        # Transformer encoder
        x = self.transformer(x)  # [batch_size, seq_len, embedding_dim]
        x = x.mean(dim=1)  # Aggregate sequence dimension (global average pooling)

        # Classification
        return self.fc(x)

model = CNNTransformer()
train_and_evaluate(model, 25)

Epoch 1, Loss: 1.2988
Epoch 2, Loss: 0.8997
Epoch 3, Loss: 0.7632
Epoch 4, Loss: 0.6596
Epoch 5, Loss: 0.5803
Epoch 6, Loss: 0.5122
Epoch 7, Loss: 0.4459
Epoch 8, Loss: 0.3847
Epoch 9, Loss: 0.3328
Epoch 10, Loss: 0.2929
Epoch 11, Loss: 0.2520
Epoch 12, Loss: 0.2163
Epoch 13, Loss: 0.1905
Epoch 14, Loss: 0.1803
Epoch 15, Loss: 0.1619
Epoch 16, Loss: 0.1454
Epoch 17, Loss: 0.1373
Epoch 18, Loss: 0.1259
Epoch 19, Loss: 0.1241
Epoch 20, Loss: 0.1119
Epoch 21, Loss: 0.1187
Epoch 22, Loss: 0.1041
Epoch 23, Loss: 0.0990
Epoch 24, Loss: 0.1040
Epoch 25, Loss: 0.1071
Accuracy: 70.81%


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import numpy as np

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root="./data", train=False, transform=transform, download=True)

# Convert datasets to numpy arrays
X_train = train_dataset.data.numpy().reshape(-1, 28 * 28)  # Flatten 28x28 images into 1D arrays
y_train = train_dataset.targets.numpy()
X_test = test_dataset.data.numpy().reshape(-1, 28 * 28)
y_test = test_dataset.targets.numpy()

# Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate on the test set
y_test_pred = rf_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Display classification report
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))

Test Accuracy: 96.91%

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.96      0.96      1032
           3       0.96      0.96      0.96      1010
           4       0.98      0.97      0.97       982
           5       0.97      0.96      0.97       892
           6       0.98      0.97      0.98       958
           7       0.97      0.96      0.97      1028
           8       0.96      0.96      0.96       974
           9       0.96      0.96      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted avg       0.97      0.97      0.97     10000



In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Device configuration
device = torch.device("mps")

# CIFAR-10 Dataset and DataLoader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

train_dataset = datasets.CIFAR10(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


# Load Pretrained ResNet Model
class ResNetCIFAR10(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNetCIFAR10, self).__init__()
        # Load a pretrained ResNet18 model
        self.resnet = models.resnet18(pretrained=True)

        # Replace the final fully connected layer to match CIFAR-10 classes
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)


# Initialize the model
model = ResNetCIFAR10().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# Training loop
def train(model, train_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}")


# Evaluation function
def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Test Accuracy: {100 * correct / total:.2f}%")


# Train and evaluate the model
train(model, train_loader, criterion, optimizer, epochs=15)
evaluate(model, test_loader)

Files already downloaded and verified
Files already downloaded and verified
Epoch [1/10], Loss: 0.9558
Epoch [2/10], Loss: 0.6538
Epoch [3/10], Loss: 0.5331
Epoch [4/10], Loss: 0.4252
Epoch [5/10], Loss: 0.3339
Epoch [6/10], Loss: 0.2649
Epoch [7/10], Loss: 0.2458
Epoch [8/10], Loss: 0.1864
Epoch [9/10], Loss: 0.1485
Epoch [10/10], Loss: 0.1212
Test Accuracy: 79.88%


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Device configuration
device = torch.device("mps")

# CIFAR-10 Dataset and DataLoader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

train_dataset = datasets.CIFAR10(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


# Load Pretrained ViT Model and Adapt It for CIFAR-10
class ViTLargeCIFAR10(nn.Module):
    def __init__(self, num_classes=10):
        super(ViTLargeCIFAR10, self).__init__()
        # Load pretrained ViT-B-32
        self.vit = models.vit_b_32(weights=models.ViT_B_32_Weights.DEFAULT)
        self.vit.heads = nn.Linear(self.vit.hidden_dim, num_classes)

    def forward(self, x):
        x = F.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
        return self.vit(x)


# Initialize the model
model = ViTLargeCIFAR10().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


# Training loop
def train(model, train_loader, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}")


# Evaluation function
def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Test Accuracy: {100 * correct / total:.2f}%")


# Train and evaluate the model
train(model, train_loader, criterion, optimizer, epochs=10)
evaluate(model, test_loader)

Files already downloaded and verified
Files already downloaded and verified


RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.