In [None]:
import pandas as pd
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

print(torch.cuda.is_available())  # Powinno zwrócić True
print(torch.cuda.device_count())  # Powinna być przynajmniej 1
print(torch.cuda.get_device_name(0))  # Powinna pokazać nazwę GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Training on device {device}")

# Constants
CSV_PATH = "hpt_dataset.csv"
IMAGE_SIZE = (64, 64)
BATCH_SIZE = 256
EPOCHS = 50
LEARNING_RATE = 0.001

# Wczytanie danych i kodowanie etykiet
df = pd.read_csv(CSV_PATH)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['word'])  # Kodowanie etykiet

# Podział danych
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.125, random_state=42)

# Transformacje obrazów (augmentacja)
transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomRotation(5), 
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

def load_images_and_labels(dataframe):
    images, labels = [], []
    for _, row in dataframe.iterrows():
        img_path = row['path']
        label = row['label']  
        image = Image.open(img_path).convert('L') 
        image = transform(image)
        images.append(image)
        labels.append(label)
    return torch.stack(images), labels

# Przygotowanie danych
train_images, train_labels = load_images_and_labels(train_df)
val_images, val_labels = load_images_and_labels(val_df)
test_images, test_labels = load_images_and_labels(test_df)

# Konwersja etykiet na tensory i utworzenie datasetów
dataset_train = TensorDataset(train_images, torch.tensor(train_labels, dtype=torch.long))
dataset_val = TensorDataset(val_images, torch.tensor(val_labels, dtype=torch.long))
dataset_test = TensorDataset(test_images, torch.tensor(test_labels, dtype=torch.long))

dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
dataloader_val = DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=False)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

# Model
num_classes = len(label_encoder.classes_)
model = nn.Sequential(
    nn.Conv2d(1, 32, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(64, 128, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(128 * 8 * 8, 256),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(256, num_classes)
)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Funkcja do obliczania dokładności
def calculate_accuracy(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad(): 
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Listy do przechowywania wyników
train_losses = []
val_accuracies = []
test_accuracies = []

# Trening modelu
def train_model():
    model.to(device)  # Przeniesienie modelu na GPU
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        for images, labels in dataloader_train:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        # Obliczanie straty i dokładności
        avg_loss = total_loss / len(dataloader_train)
        train_losses.append(avg_loss)
        
        # Obliczanie dokładności na zbiorze walidacyjnym i testowym
        val_accuracy = calculate_accuracy(model, dataloader_val)
        test_accuracy = calculate_accuracy(model, dataloader_test)
        val_accuracies.append(val_accuracy)
        test_accuracies.append(test_accuracy)
        
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {avg_loss:.4f}, "
              f"Val Accuracy: {val_accuracy:.4f}, Test Accuracy: {test_accuracy:.4f}")
plt.imshow(train_images[0][0].cpu().numpy(), cmap="gray")
plt.show()
train_model()

# Wykres straty treningowej
plt.figure(figsize=(10, 5))
plt.plot(range(1, EPOCHS + 1), train_losses, label="Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss Over Epochs")
plt.legend()
plt.show()

# Wykres dokładności walidacyjnej i testowej
plt.figure(figsize=(10, 5))
plt.plot(range(1, EPOCHS + 1), val_accuracies, label="Validation Accuracy")
plt.plot(range(1, EPOCHS + 1), test_accuracies, label="Test Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Validation and Test Accuracy Over Epochs")
plt.legend()
plt.show()

Wczytano 14105 obrazów.
Liczba unikalnych słów: 4501
Podsumowanie modelu:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Rozpoczęcie trenowania...
Epoch 1/100
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 74ms/step - accuracy: 0.0213 - loss: 12.0576 - val_accuracy: 0.0208 - val_loss: 9.0875
Epoch 2/100
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 71ms/step - accuracy: 0.0224 - loss: 8.5263 - val_accuracy: 0.0208 - val_loss: 8.1687
Epoch 3/100
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 81ms/step - accuracy: 0.0233 - loss: 7.7453 - val_accuracy: 0.0208 - val_loss: 7.9634
Epoch 4/100
[1m 37/142[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m10s[0m 103ms/step - accuracy: 0.0255 - loss: 7.5013

KeyboardInterrupt: 