In [242]:
# Step 1: Import Libraries
import pickle
import gzip
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import matplotlib.pyplot as plt

# Step 2: Load the Training Data
path_to_flatland_train_data = '/content/sample_data/flatland_train.data'
with gzip.open(path_to_flatland_train_data, 'rb') as f:
    X_train, y_train = pickle.load(f)

# Check the shapes of the data
print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")

# Step 3: Normalize and Prepare Training Data
X_train = X_train.astype(np.float32) / 255.0  # Normalize pixel values to [0, 1]
X_train_tensor = torch.tensor(X_train).unsqueeze(1)  # Add channel dimension

# Convert labels to LongTensor
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Ensure correct data type

# Create DataLoader for training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Step 4: Load the Test Data
path_to_flatland_test_data = '/content/sample_data/flatland_test.data'
with gzip.open(path_to_flatland_test_data, 'rb') as f:
    X_test = np.load(f, allow_pickle=True)

# Since X_test is a tuple, extract the first element
X_test = X_test[0]  # Shape should be (10000, 50, 50)

# Normalize the test data
X_test = X_test.astype(np.float32) / 255.0
X_test_tensor = torch.tensor(X_test).unsqueeze(1)  # Shape will now be (10000, 1, 50, 50)


Training data shape: (10000, 50, 50), Training labels shape: (10000,)


In [111]:
import pickle
import gzip
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Step 2: Load the Training Data
path_to_flatland_train_data = '/content/sample_data/flatland_train.data'
with gzip.open(path_to_flatland_train_data, 'rb') as f:
    X_train, y_train = pickle.load(f)

# Step 3:  Augment and Prepare Training Data
# Define transformations for data augmentation
transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert to PIL Image
    transforms.RandomRotation(15), # Random rotation
    transforms.RandomAffine(0, translate=(0.1,0.1)), #Random translation
    transforms.ToTensor(),  # Convert to tensor
])

X_train = X_train.astype(np.float32) / 255.0  # Normalize pixel values

# Apply augmentations
augmented_X_train = []
for img in X_train:
    augmented_X_train.append(transform(img))
X_train = np.stack(augmented_X_train)
X_train_tensor = torch.tensor(X_train)

# Convert labels to LongTensor
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Ensure correct data type

# Create DataLoader for training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Step 4: Load the Test Data
path_to_flatland_test_data = '/content/sample_data/flatland_test.data'
with gzip.open(path_to_flatland_test_data, 'rb') as f:
    X_test = np.load(f, allow_pickle=True)

# Since X_test is a tuple, extract the first element
X_test = X_test[0]  # Shape should be (10000, 50, 50)

# Normalize the test data
X_test = X_test.astype(np.float32) / 255.0
X_test_tensor = torch.tensor(X_test).unsqueeze(1)  # Shape will now be (10000, 1, 50, 50)


# Step 5: Define the CNN Model (Improved architecture)
class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32) # Batch normalization
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # Add another convolutional layer
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 6 * 6, 256)  # Adjust input size for the first FC layer
        self.dropout = nn.Dropout(0.5)  # Dropout for regularization
        self.fc2 = nn.Linear(256, 7)  # 7 classes

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = self.fc2(x)
        return x

# Instantiate the model, define the loss function and optimizer
model = ImprovedCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [32]:
# Step 6: Train the Model (Increased Epochs and potentially adjusted learning rate)
num_epochs = 25  # Increased number of epochs
optimizer = optim.Adam(model.parameters(), lr=0.0005)  # Slightly reduced learning rate

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

Epoch [1/25], Loss: 0.2129, Accuracy: 0.9355
Epoch [2/25], Loss: 0.2020, Accuracy: 0.9419
Epoch [3/25], Loss: 0.1843, Accuracy: 0.9427
Epoch [4/25], Loss: 0.1840, Accuracy: 0.9441
Epoch [5/25], Loss: 0.1784, Accuracy: 0.9451
Epoch [6/25], Loss: 0.1672, Accuracy: 0.9449
Epoch [7/25], Loss: 0.1612, Accuracy: 0.9488
Epoch [8/25], Loss: 0.1697, Accuracy: 0.9440
Epoch [9/25], Loss: 0.1610, Accuracy: 0.9459
Epoch [10/25], Loss: 0.1523, Accuracy: 0.9507
Epoch [11/25], Loss: 0.1433, Accuracy: 0.9512
Epoch [12/25], Loss: 0.1433, Accuracy: 0.9526
Epoch [13/25], Loss: 0.1331, Accuracy: 0.9526
Epoch [14/25], Loss: 0.1270, Accuracy: 0.9574
Epoch [15/25], Loss: 0.1389, Accuracy: 0.9510
Epoch [16/25], Loss: 0.1410, Accuracy: 0.9491
Epoch [17/25], Loss: 0.1288, Accuracy: 0.9520
Epoch [18/25], Loss: 0.1150, Accuracy: 0.9600
Epoch [19/25], Loss: 0.1167, Accuracy: 0.9584
Epoch [20/25], Loss: 0.1098, Accuracy: 0.9588
Epoch [21/25], Loss: 0.1114, Accuracy: 0.9580
Epoch [22/25], Loss: 0.1231, Accuracy: 0.95

In [34]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, test_predictions = torch.max(test_outputs, 1)

# Step 8: Format Predictions
predictions = ''.join([str(round(p.item())) for p in test_predictions])
print(f'Predictions: {predictions[:10000]}...')  # Print the first 50 predictions
print(f'Total predictions length: {len(predictions)}')

# Print distinct value count
distinct_values = len(set(predictions))
print(f'Distinct value count: {distinct_values}')

Predictions: 654553334340005334054556564554045306035506033300430336435666405465330365550305335053060400035535646353040335635535505003536055000455553044545554365435504654563563553654630353665636666036030560546353533635333633546536330605356555336045005550006354506034655350045303346544555635450334030034455365505353330645405530344340303000630000465550633456054433005435650044304435064546334600030655530555364505635553333663633065064534555435453505365443305330405555450450534565305066430030344554460403030634500656433350050653654433033403564343534365040333036355343560356640344055004340343045056560530343453665435543503500445305656456603540333046345536440656304355544334535056305355666066463333635033435634535335306033304354343360454643333403563006445054450364360060335563355630055330303303046344450064646343366050560435645563640333003550546405666443065303335534054453063354363565336503003543044035663534406355003303643335456063503554050464005535435036063065565640436363564643540400056635530535550303530

Šitas modelis su test data gavo visai neblogą tikslumą 86%. Tačiau norisi dar didesnio tiklsumo.


Bandome dar labiau stiprinti modelį, pridedame conv sluoksnį, transformuojame paveiklsiukus dar kartą, kad geriau treniruotųsi modelis.

In [162]:
import torch.optim.lr_scheduler as lr_scheduler
import torchvision.transforms as transforms

# Improved Model Definition
class EnhancedCNN(nn.Module):
    def __init__(self):
        super(EnhancedCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, padding=2)  # Larger kernel size
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)  # Extra layer
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(256 * 3 * 3, 256)  # Adjust for additional layer
        self.dropout = nn.Dropout(0.6)  # Increased dropout
        self.fc2 = nn.Linear(256, 7)  # Final output layer for 7 classes

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        x = x.view(-1, 256 * 3 * 3)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Instantiate the improved model
model = EnhancedCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # Added weight decay
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)  # Learning rate scheduler

# Improved data augmentation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(20),  # Increased rotation for variation
    transforms.RandomAffine(0, translate=(0.2, 0.2)),  # Increased translation
    transforms.RandomHorizontalFlip(),  # Added horizontal flip
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Slight brightness/contrast
    transforms.ToTensor(),
])

# Apply the updated transformations to training data
augmented_X_train = [transform(img) for img in X_train]
X_train = torch.stack(augmented_X_train)

# Redefine the DataLoader with updated data
train_dataset = TensorDataset(X_train, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training Loop with Learning Rate Scheduler
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy on the training batch
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    # Step the scheduler after each epoch
    scheduler.step()

    # Calculate epoch accuracy
    train_accuracy = 100 * correct_train / total_train

    # Step the scheduler after each epoch
    scheduler.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")



Epoch [1/20], Loss: 1.0935, Train Accuracy: 53.61%
Epoch [2/20], Loss: 0.6921, Train Accuracy: 73.80%
Epoch [3/20], Loss: 0.5392, Train Accuracy: 82.01%
Epoch [4/20], Loss: 0.4850, Train Accuracy: 84.65%
Epoch [5/20], Loss: 0.4577, Train Accuracy: 85.55%
Epoch [6/20], Loss: 0.3635, Train Accuracy: 90.13%
Epoch [7/20], Loss: 0.3284, Train Accuracy: 91.48%
Epoch [8/20], Loss: 0.3106, Train Accuracy: 92.26%
Epoch [9/20], Loss: 0.2810, Train Accuracy: 92.90%
Epoch [10/20], Loss: 0.2598, Train Accuracy: 93.72%
Epoch [11/20], Loss: 0.2220, Train Accuracy: 95.07%
Epoch [12/20], Loss: 0.2068, Train Accuracy: 95.40%
Epoch [13/20], Loss: 0.2007, Train Accuracy: 95.55%
Epoch [14/20], Loss: 0.1903, Train Accuracy: 95.52%
Epoch [15/20], Loss: 0.1760, Train Accuracy: 95.92%
Epoch [16/20], Loss: 0.1598, Train Accuracy: 96.43%
Epoch [17/20], Loss: 0.1501, Train Accuracy: 96.56%
Epoch [18/20], Loss: 0.1369, Train Accuracy: 96.65%
Epoch [19/20], Loss: 0.1251, Train Accuracy: 96.82%
Epoch [20/20], Loss: 

In [163]:
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    _, test_predictions = torch.max(test_outputs, 1)

# Step 8: Format Predictions
predictions = ''.join([str(round(p.item())) for p in test_predictions])
print(f'Predictions: {predictions[:10000]}...')  # Print the first 50 predictions
print(f'Total predictions length: {len(predictions)}')

# Print distinct value count
distinct_values = len(set(predictions))
print(f'Distinct value count: {distinct_values}')

Predictions: 604553334340005334054546564554040305035555033300430335430665405455330365550304335053065450035535546353040330534534504003536055000455553044545554365434504654553553553654630353655636565035030550546353533535333633046535330605355545336045005450006344006034554340045303345544554635450334530034455365505353330644405530344340303050530005405550633450054433005435450044304435064546334600030650430505364504535053333663633065054434550435453500364443305330405554450450534555305066430030344454450403030634400655433340050603654433033403564343434365040333036354343560355645344055054340343045056550530343453655435543503500445300546450503040333046345035440656304355444334535055305355560056453333635033435634435334306033304344343350454643333403553005445054450364360050335563355630055330303303046344440054645343366050560434545553640333003550546405656443055303335534054453063354363565336403003543044035653434400355503303643335455063503544050464005534434035063064565640436363564643540400056035530535450303530

Su šituo modeliu gauname 90% tikslumą, vadinas pridėjus conv sluoksnį, transformuojant paveiksliukus, galime gerinti modelio tikslumą.

*Modelio efektyvumas: 90% tikslumas rodo,
kad modelis gerai atpažįsta ir klasifikuoja
figūras.

* Overfitting: reikia atsargiai su conv sluoksniais.

* Klasės balansavimas: turime įvertinti, ar duomenų rinkinys yra subalansuotas. 90% tikslumas gali būti klaidingas, jei modelis gerai klasifikuoja tik dažniausiai pasitaikančias klases, tačiau prastai atpažįsta retesnes klases.

* Tolimesni patobulinimai: Nors 90% yra geras pasiekimas, visada galima ieškoti būdų, kaip pagerinti modelio veikimą. Galima išbandyti skirtingas architektūras, optimizavimo metodus, duomenų didinimą (data augmentation) ar hiperparametrų derinimą. Arba naudoti jau ištreniruotus modelius, bandyt juos taikyt savo duomenims.


In [224]:
import pickle
import gzip
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import models, transforms
import torch.nn.functional as F

In [225]:
# Step 4: Data Augmentation
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
])

# Reshape X_train_tensor to (num_images, channels, height, width)
# Assuming grayscale images, channels = 1
X_train_tensor = X_train_tensor.reshape(-1, 1, 50, 50)

# Augment training data
augmented_X_train = [data_transforms(img) for img in X_train_tensor]
X_train_tensor = torch.stack(augmented_X_train)

In [228]:
# Step 4: Define a CNN Model using Transfer Learning
class CNNTransferLearning(nn.Module):
    def __init__(self):
        super(CNNTransferLearning, self).__init__()
        # Load a pre-trained ResNet model
        self.resnet = models.resnet18(pretrained=True)
        # Modify the first convolutional layer to accept 1 channel input
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) #changed input channel to 1
        # Modify the last layer to fit our number of classes
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_ftrs, 7)  # Assuming 7 classes

    def forward(self, x):
        x = x.view(-1, 1, 50, 50)  # Reshape for CNN input
        x = self.resnet(x)  # Forward pass through ResNet
        return x

# Step 5: Initialize the Model, Loss Function, and Optimizer
model = CNNTransferLearning()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 6: Training Loop with Early Stopping
num_epochs = 5
best_accuracy = 0
patience = 5  # Early stopping patience
counter = 0

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in DataLoader(train_dataset, batch_size=32, shuffle=True):
        images, labels = images.to(device), labels.to(device)  # Move to GPU if available
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        running_loss += loss.item()

        # Calculate accuracy on the training batch
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    # Calculate epoch accuracy
    train_accuracy = 100 * correct_train / total_train
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")

Epoch [1/5], Loss: 0.3373, Train Accuracy: 90.22%
Epoch [2/5], Loss: 0.1465, Train Accuracy: 97.34%
Epoch [3/5], Loss: 0.1431, Train Accuracy: 97.46%
Epoch [4/5], Loss: 0.1273, Train Accuracy: 98.04%
Epoch [5/5], Loss: 0.1415, Train Accuracy: 97.62%


In [229]:
# prompt: evaluate and print prediction cnntranderlearning model

model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)  # Move test data to the same device as the model
    test_outputs = model(X_test_tensor)
    _, test_predictions = torch.max(test_outputs, 1)

predictions = ''.join([str(round(p.item())) for p in test_predictions])
print(f'Predictions: {predictions[:10000]}...')  # Print the first 1000 predictions
print(f'Total predictions length: {len(predictions)}')

# Print distinct value count
distinct_values = len(set(predictions))
print(f'Distinct value count: {distinct_values}')

Predictions: 654553334340006334054556564553045305035566033300436336436666405455330365550303335053065460035535546353040335533535505003535065000455553044545554365435504654563563563655630353665636566036030565646353533635333633546535330605356635336045005550006334506034665330045353345544555635450334530035455365505353330645405535343340303060530005355550633456054433005435350034304435064546334605030655530555364505535553333663633065064334555435453605365443366330405555450450534555305066430030345554450503030634500655433350050653654433033403564353335365040333036355343560355655344055054340343045056550530353453665435543503500345305656456603540343046345536450656304355554334535055305356665056453333635033435634535335306033304355343350454644333403663005445054450364360050335563365630055330303303646344450053645343366050560435545653640333003550546406656343055303335534054453063364363565336503003553044035653533406355603303643335455063503634050464005534433036063065565640436363564643540500056635530536550303530

In [250]:
import pickle
import gzip
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import models, transforms
import torch.nn.functional as F
from torchvision.transforms import ToPILImage # Import ToPILImage
from PIL import Image

# Step 2: Load the Training Data
path_to_flatland_train_data = '/content/sample_data/flatland_train.data'
with gzip.open(path_to_flatland_train_data, 'rb') as f:
    X_train, y_train = pickle.load(f)

# Check the shapes of the data
print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train.shape}")

# Step 3: Normalize and Prepare Training Data
X_train = X_train.astype(np.float32) / 255.0  # Normalize pixel values to [0, 1]
#X_train_tensor = torch.tensor(X_train).unsqueeze(1)  # Add channel dimension

# Convert labels to LongTensor
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # Ensure correct data type

# Data Augmentation
data_transforms = transforms.Compose([
    ToPILImage(), # Convert to PIL Image before other transformations
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
])

# Custom Dataset Class for Data Augmentation
class CustomDataset(TensorDataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image = self.data[idx]
        # Convert to PIL Image if it's not already a PIL Image
        # Removed unnecessary ToPILImage() conversion
        #if not isinstance(image, Image.Image): # Check if it's a PIL Image
        #    image = ToPILImage()(image) # Convert to PIL Image

        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        return image, label

# Create DataLoader for training data with Data Augmentation
train_dataset = CustomDataset(X_train_tensor, y_train_tensor, transform=data_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Step 4: Define a CNN Model using DenseNet
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        # Load a pre-trained DenseNet model
        self.densenet = models.densenet121(pretrained=True)
        # Modify the first convolutional layer to accept 1 channel input
        self.densenet.features.conv0 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) #changed input channel to 1
        # Modify the last layer to fit our number of classes
        num_ftrs = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Linear(num_ftrs, 7)  # Assuming 7 classes
        self.dropout = nn.Dropout(0.5)  # Adding dropout for regularization

    def forward(self, x):
        x = x.view(-1, 1, 50, 50)  # Reshape for CNN input
        x = self.densenet(x)  # Forward pass through DenseNet
        return x

# Step 5: Initialize the Model, Loss Function, and Optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)  # AdamW for better convergence

# Step 6: Training Loop with Early Stopping and Validation
num_epochs = 7
best_accuracy = 0
patience = 5  # Early stopping patience
counter = 0

# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move to GPU if available
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        running_loss += loss.item()

        # Calculate accuracy on the training batch
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)

    # Calculate epoch accuracy
    train_accuracy = 100 * correct_train / total_train
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.2f}%")


Training data shape: (10000, 50, 50), Training labels shape: (10000,)
Epoch [1/7], Loss: 0.3671, Train Accuracy: 89.01%
Epoch [2/7], Loss: 0.1774, Train Accuracy: 96.37%
Epoch [3/7], Loss: 0.1376, Train Accuracy: 97.67%
Epoch [4/7], Loss: 0.1255, Train Accuracy: 98.15%
Epoch [5/7], Loss: 0.1369, Train Accuracy: 97.68%
Epoch [6/7], Loss: 0.1237, Train Accuracy: 98.22%
Epoch [7/7], Loss: 0.1226, Train Accuracy: 97.98%


In [251]:
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)  # Move test data to the same device as the model
    test_outputs = model(X_test_tensor)
    _, test_predictions = torch.max(test_outputs, 1)

predictions = ''.join([str(round(p.item())) for p in test_predictions])
print(f'Predictions: {predictions[:10000]}...')  # Print the first 1000 predictions
print(f'Total predictions length: {len(predictions)}')

# Print distinct value count
distinct_values = len(set(predictions))
print(f'Distinct value count: {distinct_values}')

Predictions: 664553334340006334054546564554046305035566033300436336436666405455330365550304335053066450035535546353040336534534504003536065000465553044545554365434504654563563563654630353665636566036030566646353633636333633646535330605356645336045005450006344606034664340045363345644654635450334630034455365505353330644405536344340303060530005465550633456054433005436450044304435064546334606030656430555364504535553333663633066064434556435453600364443366330405554450450534555305066430030344454450403030634400655433340050603654433033403564343434365040333036354343560355640344055064340343045056550530343453665435543503500445306646456603640333046345636440656304355444334535056305356666056453333635033435634435334306033304344343350454643333403663005445054450364360050335563365630055330303303646344445054645343366060560434545653640333003550546406656443055303336534054453063364363565336403003543044035653434406355603303643335456063503644050464006534434036063064565640436363564643540400056635530536460303530

Su šiuo modeliu gavome 99 % accuracy.
* Sukurta CustomDataset klasė plečia TensorDataset, leidžiančią taikyti transformacijas tiesiogiai gaunant duomenis.
* CNN modelis, paremtas iš anksto apmokyta DenseNet architektūra.
* Svarbu stabdyti modelį, kai kas epochą tikslumas pradeda mažėti.
