In [None]:
!pip install dropbox pandas 

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import requests

# Wczytanie danych
data = pd.read_csv('Signature-by-AI\data\Signature_classification\signatures\signatures\human\001g01.csv', delimiter=' ')



In [None]:
# Tworzenie wykresu
plt.figure(figsize=(12, 8))
plt.scatter(data['x'], data['y'], s=10, color='blue', alpha=0.7)
plt.title('Wizualizacja danych x-y', fontsize=16)
plt.xlabel('Wartości x', fontsize=12)
plt.ylabel('Wartości y', fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()

# Pokazanie wykresu
plt.show()

In [None]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import classification_report

# 1. Konfiguracja
class Config:
    data_dirs = {
        'GAN': 'Signature-by-AI\data\Signature_classification\signatures\signatures\gan',
        'Human': 'Signature-by-AI\data\Signature_classification\signatures\signatures\human',
        'SDT': 'Signature-by-AI\data\Signature_classification\signatures\signatures\std', 
        'VAE': 'Signature-by-AI\data\Signature_classification\signatures\signatures\vae'
    }
    batch_size = 64
    max_seq_len = 1000  # Maksymalna długość sekwencji
    hidden_size = 128   # Rozmiar ukryty LSTM
    num_epochs = 60
    learning_rate = 0.001
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Przygotowanie danych
class SignatureDataset(Dataset):
    def __init__(self, config):
        self.config = config
        self.samples = []
        self.labels = []
        self.label_map = {'GAN': 0, 'Human': 1, 'SDT': 2, 'VAE': 3}
        
        # Wczytanie wszystkich plików
        for label, dir_path in config.data_dirs.items():
            files = [f for f in os.listdir(dir_path) if f.endswith('.csv')]
            for file in files:
                self.samples.append(os.path.join(dir_path, file))
                self.labels.append(self.label_map[label])
                
        # Statystyki normalizacji (obliczane na całym zbiorze)
        self._calculate_normalization_stats()
        
    def _calculate_normalization_stats(self):
        """Oblicza średnie i odchylenia dla normalizacji"""
        all_x, all_y = [], []
        for file in self.samples:
            data = pd.read_csv(file, delim_whitespace=True)
            all_x.extend(data['x'].values)
            all_y.extend(data['y'].values)
            
        self.x_mean, self.x_std = np.mean(all_x), np.std(all_x)
        self.y_mean, self.y_std = np.mean(all_y), np.std(all_y)
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        # Wczytanie danych
        data = pd.read_csv(self.samples[idx], delim_whitespace=True)
        
        # Normalizacja
        x = (data['x'] - self.x_mean) / self.x_std
        y = (data['y'] - self.y_mean) / self.y_std
        
        # Przygotowanie tensora [seq_len, 2]
        sequence = torch.stack([
            torch.FloatTensor(x.values),
            torch.FloatTensor(y.values)
        ], dim=1)
        
        # Dopełnienie zerami do max_seq_len
        padded_seq = torch.zeros((self.config.max_seq_len, 2))
        length = min(sequence.size(0), self.config.max_seq_len)
        padded_seq[:length] = sequence[:length]
        
        return padded_seq, torch.tensor(self.labels[idx], dtype=torch.long), length

# 3. Model sieci neuronowej
class CNNModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv1d(in_channels=2, out_channels=32, kernel_size=5, stride=2),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3),
            nn.Conv1d(32, 64, kernel_size=3),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(10)  # Ujednolicenie rozmiaru
        )
        self.classifier = nn.Sequential(
            nn.Linear(64*10, 128),
            nn.ReLU(),
            nn.Linear(128, 4)
        )

    def forward(self, x, lengths=None):
        x = x.permute(0, 2, 1)  # [batch, channels, seq_len]
        x = self.features(x)
        return self.classifier(x.view(x.size(0), -1))
#  alternatywa dla CNN
class LSTMModel(nn.Module):
    def __init__(self, input_size=2, hidden_size=128, num_layers=2, num_classes=4):
        super().__init__()
        
        # Warstwa LSTM
        self.lstm = nn.LSTM(
            input_size=input_size,       # 2 wymiary (x,y)
            hidden_size=hidden_size,     # Rozmiar ukrytej reprezentacji
            num_layers=num_layers,       # Liczba warstw LSTM
            batch_first=True,            # Format danych [batch, seq_len, features]
            dropout=0.3 if num_layers > 1 else 0  # Dropout tylko dla wielu warstw
        )
        
        # Warstwa klasyfikacyjna
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size, 64),  # Warstwa pośrednia
            nn.ReLU(),                   # Aktywacja
            nn.Dropout(0.2),             # Regularizacja
            nn.Linear(64, num_classes)   # Wyjście dla 4 klas
        )
    
    def forward(self, x, lengths):
        # x: tensor [batch_size, seq_len, 2]
        # lengths: rzeczywiste długości sekwencji
        
        # Pakowanie sekwencji (ignoruje paddowanie)
        packed = nn.utils.rnn.pack_padded_sequence(
            input=x,
            lengths=lengths.cpu(),  # Wymagane obliczenia na CPU
            batch_first=True,
            enforce_sorted=False    # Dopuszcza nieposortowane sekwencje
        )
        
        # Przebieg przez LSTM
        _, (hidden, _) = self.lstm(packed)
        
        # Pobierz ostatni stan ukryty (dla klasyfikacji)
        last_hidden = hidden[-1]  # [batch_size, hidden_size]
        
        # Klasyfikacja
        logits = self.classifier(last_hidden)
        return logits
    
    

# 4. Funkcje pomocnicze
def create_dataloaders(dataset, config):
    """Tworzy DataLoaders dla zbiorów treningowego i walidacyjnego"""
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_data, val_data = random_split(dataset, [train_size, val_size])
    
    def collate_fn(batch):
        features, labels, lengths = zip(*batch)
        return torch.stack(features), torch.stack(labels), torch.tensor(lengths)
    
    train_loader = DataLoader(
        train_data, batch_size=config.batch_size, 
        shuffle=True, collate_fn=collate_fn
    )
    val_loader = DataLoader(
        val_data, batch_size=config.batch_size,
        shuffle=False, collate_fn=collate_fn
    )
    
    return train_loader, val_loader

def train_model(model, train_loader, val_loader, config):
    """Procedura treningowa"""
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=3)
    
    best_acc = 0.0
    for epoch in range(config.num_epochs):
        # Trening
        model.train()
        train_loss = 0.0
        for features, labels, lengths in tqdm(train_loader, desc=f'Epoch {epoch+1}'):
            features = features.to(config.device)
            labels = labels.to(config.device)
            
            optimizer.zero_grad()
            outputs = model(features, lengths)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Walidacja
        model.eval()
        val_loss = 0.0
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for features, labels, lengths in val_loader:
                features = features.to(config.device)
                labels = labels.to(config.device)
                
                outputs = model(features, lengths)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Metryki
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        val_acc = np.mean(np.array(all_preds) == np.array(all_labels))
        
        # Dostosowanie learning rate
        scheduler.step(val_acc)
        
        # Zapis najlepszego modelu
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
        
        # Raport klasyfikacji
        print(f"\nEpoch {epoch+1}/{config.num_epochs}")
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
        print(classification_report(
            all_labels, all_preds, 
            target_names=config.data_dirs.keys()
        ))

# 5. Główny przepływ
if __name__ == "__main__":
    config = Config()
    
    # Przygotowanie danych
    dataset = SignatureDataset(config)
    train_loader, val_loader = create_dataloaders(dataset, config)
    
    # Inicjalizacja modelu
    model = CNNModel().to(config.device)
    print(f"Model architecture:\n{model}")
    print(f"Training on {config.device}")
    
    # Trening
    train_model(model, train_loader, val_loader, config)
    
    # Ocena końcowa
    print("Training completed. Best model saved to 'best_model.pth'")