<a href="https://colab.research.google.com/github/GIDMaxymKrasnykov/Maks_Krasnikov_ml_engineering_lab/blob/main/Maks_Krasnikov_ml_engineering_lab1_ipynb_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Підготовка даних**

In [None]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split

# Configurations
CONFIG = {
    "data_dir": "./data",
    "batch_size": 64,
    "num_workers": 4,
    "validation_split": 0.1,
    "test_split": 0.1,
    "seed": 42,
}

# Ensure reproducibility
torch.manual_seed(CONFIG["seed"])

# Data Preparation
def prepare_data():
    """Download, preprocess, and split the dataset."""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Download CIFAR-10 dataset
    dataset = torchvision.datasets.CIFAR10(root=CONFIG["data_dir"], train=True, download=True, transform=transform)
    test_dataset = torchvision.datasets.CIFAR10(root=CONFIG["data_dir"], train=False, download=True, transform=transform)

    # Calculate split sizes
    total_size = len(dataset)
    val_size = int(CONFIG["validation_split"] * total_size)
    train_size = total_size - val_size

    # Split dataset
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], shuffle=True, num_workers=CONFIG["num_workers"])
    val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False, num_workers=CONFIG["num_workers"])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"], shuffle=False, num_workers=CONFIG["num_workers"])

    return train_loader, val_loader, test_loader

# Prepare the data
if __name__ == "__main__":
    train_loader, val_loader, test_loader = prepare_data()
    print("Data preparation complete.\n")
    print(f"Training samples: {len(train_loader.dataset)}")
    print(f"Validation samples: {len(val_loader.dataset)}")
    print(f"Test samples: {len(test_loader.dataset)}")


100%|██████████| 170M/170M [00:10<00:00, 16.4MB/s]


Data preparation complete.

Training samples: 45000
Validation samples: 5000
Test samples: 10000




**Створення простої нейронної мережі, реалізації циклів навчання та валідації, а також оновлення аугментацію даних**

In [None]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim

# Configurations
CONFIG = {
    "data_dir": "./data",
    "batch_size": 64,
    "num_workers": 4,
    "validation_split": 0.1,
    "test_split": 0.1,
    "seed": 42,
    "learning_rate": 0.001,
    "num_epochs": 10
}

# Ensure reproducibility
torch.manual_seed(CONFIG["seed"])

# Data Preparation
def prepare_data():
    """Download, preprocess, and split the dataset."""
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Download CIFAR-10 dataset
    dataset = torchvision.datasets.CIFAR10(root=CONFIG["data_dir"], train=True, download=True, transform=transform)
    test_dataset = torchvision.datasets.CIFAR10(root=CONFIG["data_dir"], train=False, download=True, transform=transform)

    # Calculate split sizes
    total_size = len(dataset)
    val_size = int(CONFIG["validation_split"] * total_size)
    train_size = total_size - val_size

    # Split dataset
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], shuffle=True, num_workers=CONFIG["num_workers"])
    val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False, num_workers=CONFIG["num_workers"])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"], shuffle=False, num_workers=CONFIG["num_workers"])

    return train_loader, val_loader, test_loader

# Define Simple Neural Network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 32 * 3, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.model(x)

# Training Loop
def train_model(train_loader, val_loader):
    """Train the neural network."""
    model = SimpleNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])

    for epoch in range(CONFIG["num_epochs"]):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{CONFIG['num_epochs']}], Loss: {running_loss/len(train_loader):.4f}")

        # Validation step
        validate_model(model, val_loader)

    return model

# Validation Loop
def validate_model(model, val_loader):
    """Validate the neural network."""
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Validation Accuracy: {100 * correct / total:.2f}%")

# Main Workflow
if __name__ == "__main__":
    train_loader, val_loader, test_loader = prepare_data()
    print("Data preparation complete.\n")
    print(f"Training samples: {len(train_loader.dataset)}")
    print(f"Validation samples: {len(val_loader.dataset)}")
    print(f"Test samples: {len(test_loader.dataset)}")

    print("Starting training...")
    trained_model = train_model(train_loader, val_loader)
    print("Training complete.")


Data preparation complete.

Training samples: 45000
Validation samples: 5000
Test samples: 10000
Starting training...
Epoch [1/10], Loss: 1.8049
Validation Accuracy: 37.86%
Epoch [2/10], Loss: 1.6426
Validation Accuracy: 42.80%
Epoch [3/10], Loss: 1.5806
Validation Accuracy: 45.00%
Epoch [4/10], Loss: 1.5356
Validation Accuracy: 45.48%
Epoch [5/10], Loss: 1.5086
Validation Accuracy: 45.76%
Epoch [6/10], Loss: 1.4789
Validation Accuracy: 46.64%
Epoch [7/10], Loss: 1.4607
Validation Accuracy: 47.00%
Epoch [8/10], Loss: 1.4439
Validation Accuracy: 47.72%
Epoch [9/10], Loss: 1.4287
Validation Accuracy: 47.62%
Epoch [10/10], Loss: 1.4207
Validation Accuracy: 48.66%
Training complete.


**Тестування моделі та обчислення додаткових метрик (precision, recall, F1-score). Тепер модель може бути протестована після навчання, а результати будуть представлені у вигляді метрик.**

In [None]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Configurations
CONFIG = {
    "data_dir": "./data",
    "batch_size": 64,
    "num_workers": 4,
    "validation_split": 0.1,
    "test_split": 0.1,
    "seed": 42,
    "learning_rate": 0.001,
    "num_epochs": 10
}

# Ensure reproducibility
torch.manual_seed(CONFIG["seed"])

# Data Preparation
def prepare_data():
    """Download, preprocess, and split the dataset."""
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # Download CIFAR-10 dataset
    dataset = torchvision.datasets.CIFAR10(root=CONFIG["data_dir"], train=True, download=True, transform=transform)
    test_dataset = torchvision.datasets.CIFAR10(root=CONFIG["data_dir"], train=False, download=True, transform=transform)

    # Calculate split sizes
    total_size = len(dataset)
    val_size = int(CONFIG["validation_split"] * total_size)
    train_size = total_size - val_size

    # Split dataset
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=CONFIG["batch_size"], shuffle=True, num_workers=CONFIG["num_workers"])
    val_loader = DataLoader(val_dataset, batch_size=CONFIG["batch_size"], shuffle=False, num_workers=CONFIG["num_workers"])
    test_loader = DataLoader(test_dataset, batch_size=CONFIG["batch_size"], shuffle=False, num_workers=CONFIG["num_workers"])

    return train_loader, val_loader, test_loader

# Define Simple Neural Network
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 32 * 3, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        return self.model(x)

# Training Loop
def train_model(train_loader, val_loader):
    """Train the neural network."""
    model = SimpleNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=CONFIG["learning_rate"])

    for epoch in range(CONFIG["num_epochs"]):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{CONFIG['num_epochs']}], Loss: {running_loss/len(train_loader):.4f}")

        # Validation step
        validate_model(model, val_loader)

    return model

# Validation Loop
def validate_model(model, val_loader):
    """Validate the neural network."""
    model.eval()
    correct = 0
    total = 0

    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_predictions, average='weighted')
    recall = recall_score(all_labels, all_predictions, average='weighted')
    f1 = f1_score(all_labels, all_predictions, average='weighted')

    print(f"Validation Accuracy: {accuracy:.2f}%")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

# Test Model
def test_model(model, test_loader):
    """Test the trained model and report metrics."""
    model.eval()
    correct = 0
    total = 0

    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_predictions, average='weighted')
    recall = recall_score(all_labels, all_predictions, average='weighted')
    f1 = f1_score(all_labels, all_predictions, average='weighted')

    print(f"Test Accuracy: {accuracy:.2f}%")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

# Main Workflow
if __name__ == "__main__":
    train_loader, val_loader, test_loader = prepare_data()
    print("Data preparation complete.\n")
    print(f"Training samples: {len(train_loader.dataset)}")
    print(f"Validation samples: {len(val_loader.dataset)}")
    print(f"Test samples: {len(test_loader.dataset)}")

    print("Starting training...")
    trained_model = train_model(train_loader, val_loader)
    print("Training complete.")

    print("Starting testing...")
    test_model(trained_model, test_loader)
    print("Testing complete.")


Data preparation complete.

Training samples: 45000
Validation samples: 5000
Test samples: 10000
Starting training...




Epoch [1/10], Loss: 1.8049
Validation Accuracy: 37.86%
Precision: 0.3940, Recall: 0.3786, F1-Score: 0.3677




Epoch [2/10], Loss: 1.6426
Validation Accuracy: 42.80%
Precision: 0.4333, Recall: 0.4280, F1-Score: 0.4186




Epoch [3/10], Loss: 1.5806
Validation Accuracy: 45.00%
Precision: 0.4571, Recall: 0.4500, F1-Score: 0.4471




Epoch [4/10], Loss: 1.5356
Validation Accuracy: 45.48%
Precision: 0.4568, Recall: 0.4548, F1-Score: 0.4517




Epoch [5/10], Loss: 1.5086
Validation Accuracy: 45.76%
Precision: 0.4575, Recall: 0.4576, F1-Score: 0.4492




Epoch [6/10], Loss: 1.4789
Validation Accuracy: 46.64%
Precision: 0.4709, Recall: 0.4664, F1-Score: 0.4613




Epoch [7/10], Loss: 1.4607
Validation Accuracy: 47.00%
Precision: 0.4696, Recall: 0.4700, F1-Score: 0.4610




Epoch [8/10], Loss: 1.4439
Validation Accuracy: 47.72%
Precision: 0.4896, Recall: 0.4772, F1-Score: 0.4777




Epoch [9/10], Loss: 1.4287
Validation Accuracy: 47.62%
Precision: 0.4707, Recall: 0.4762, F1-Score: 0.4619




Epoch [10/10], Loss: 1.4207
Validation Accuracy: 48.66%
Precision: 0.4834, Recall: 0.4866, F1-Score: 0.4763
Training complete.
Starting testing...




Test Accuracy: 48.64%
Precision: 0.4826, Recall: 0.4864, F1-Score: 0.4745
Testing complete.


**Автоматичне створення графіків**

In [None]:
import matplotlib.pyplot as plt

def plot_metrics(metrics, save_path="metrics.png"):
    epochs = range(1, len(metrics["accuracy"]) + 1)
    plt.plot(epochs, metrics["accuracy"], label="Accuracy")
    plt.plot(epochs, metrics["f1"], label="F1-Score")
    plt.xlabel("Epochs")
    plt.ylabel("Metric Value")
    plt.legend()
    plt.savefig(save_path)
    plt.show()