In [1]:
import torch
import json
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
from datasets import load_dataset
from sklearn.preprocessing import StandardScaler, LabelEncoder
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from tqdm import tqdm

class DurakDataset(Dataset):
    def __init__(self, games, max_hand_size=6):
        self.max_hand_size = max_hand_size
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
        # Сначала соберем все возможные карты для кодирования
        all_cards = set()
        valid_games = []
        
        for game in games:
            try:
                if isinstance(game['snapshot'], str):
                    snapshot = json.loads(game['snapshot'])
                else:
                    snapshot = game['snapshot']
                    
                if 'players' in snapshot and len(snapshot['players']) > 0:
                    for player in snapshot['players']:
                        if 'hand' in player:
                            all_cards.update(player['hand'])
                    valid_games.append(snapshot)
            except (json.JSONDecodeError, KeyError) as e:
                continue
        
        if not valid_games:
            raise ValueError("Не найдено ни одной валидной игры в датасете")
        
        # Создаем кодировщик карт
        self.card_encoder = {card: idx+1 for idx, card in enumerate(all_cards)}
        self.card_encoder['PAD'] = 0
        
        # Подготовка фичей и меток
        features = []
        labels = []
        
        # Определим фиксированное количество признаков
        num_features = self.max_hand_size + 4  # карты + 4 дополнительных признака
        
        for game in valid_games:
            trump_suit = game.get('trump', '')[-1] if game.get('trump') else ''
            player = game['players'][0]
            
            # Кодируем карты в руке
            hand_encoded = [self.card_encoder.get(card, 0) for card in player.get('hand', [])]
            hand_encoded += [0] * (self.max_hand_size - len(hand_encoded))
            
            # Собираем все признаки
            game_features = hand_encoded.copy()
            game_features.extend([
                game.get('game_rules', {}).get('game_type', 0),
                sum(1 for card in player.get('hand', []) if card[-1] == trump_suit),
                len(player.get('hand', [])),
                len(game.get('deck', []))
            ])
            
            # Проверяем, что количество признаков соответствует ожидаемому
            if len(game_features) != num_features:
                continue
                
            features.append(game_features)
            labels.append(player.get('state', 'unknown'))
        
        # Преобразуем в numpy array с проверкой размерностей
        try:
            features_array = np.vstack(features).astype(np.float32)
        except ValueError as e:
            print("Ошибка при создании массива признаков:", e)
            print("Пример features[0]:", features[0] if features else "Нет данных")
            raise
        
        # Нормализация
        self.features = self.scaler.fit_transform(features_array)
        self.labels = self.label_encoder.fit_transform(labels)
        
        print(f"Успешно загружено {len(self.labels)} примеров")
        print(f"Размерность признаков: {self.features.shape}")

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return (
            torch.FloatTensor(self.features[idx]),
            torch.LongTensor([self.labels[idx]])
        )

class DurakNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers=2):
        super().__init__()
        # Увеличиваем размер словаря для embedding
        self.embedding = nn.Embedding(num_embeddings=1000, embedding_dim=16)  # Было 100
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )
        self.attention = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1, bias=False)
        )
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        cards = x[:, :6].long()
        # Добавляем ограничение значений для embedding
        cards = torch.clamp(cards, 0, self.embedding.num_embeddings - 1)
        
        other_features = x[:, 6:]
        
        cards_embedded = self.embedding(cards)
        lstm_input = torch.cat([
            cards_embedded,
            other_features.unsqueeze(1).expand(-1, 6, -1)
        ], dim=2)
        
        lstm_out, _ = self.lstm(lstm_input)
        attention_weights = F.softmax(self.attention(lstm_out), dim=1)
        context = torch.sum(attention_weights * lstm_out, dim=1)
        
        return self.fc(context)

if __name__ == "__main__":
    # Параметры
    BATCH_SIZE = 64
    EPOCHS = 20
    HIDDEN_SIZE = 128

    # Загрузка данных
    dataset = load_dataset("neuronetties/durak")
    
    try:
        full_dataset = DurakDataset(dataset["train"])
        
        # Проверка индексов карт
        max_card_idx = max(full_dataset.card_encoder.values())
        print(f"Максимальный индекс карты: {max_card_idx}")
        
        # Разделение данных
        train_size = int(0.95 * len(full_dataset))
        test_size = len(full_dataset) - train_size

        train_dataset, test_dataset = random_split(
            full_dataset,
            [train_size, test_size],
            generator=torch.Generator().manual_seed(42)
        )

        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

        # Инициализация модели с учетом размера словаря
        model = DurakNN(
            input_size=16 + (full_dataset.features.shape[1] - 6),
            hidden_size=HIDDEN_SIZE,
            num_classes=len(full_dataset.label_encoder.classes_)
        )
        
        # Проверка и корректировка embedding
        if max_card_idx >= model.embedding.num_embeddings:
            print(f"Корректируем размер embedding слоя с {model.embedding.num_embeddings} до {max_card_idx + 100}")
            model.embedding = nn.Embedding(
                num_embeddings=max_card_idx + 100,
                embedding_dim=16
            )

        optimizer = Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()

        # Цикл обучения
        for epoch in range(EPOCHS):
            model.train()
            total_loss = 0
            for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
                x, y = batch
                optimizer.zero_grad()
                outputs = model(x)
                loss = criterion(outputs, y.squeeze())
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            
            print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")

        # Сохранение модели
        torch.save({
            'model_state_dict': model.state_dict(),
            'scaler': full_dataset.scaler,
            'card_encoder': full_dataset.card_encoder,
            'label_encoder': full_dataset.label_encoder,
            'input_size': model.lstm.input_size
        }, 'durak_deep_model.pt')
        
    except Exception as e:
        print(f"Произошла ошибка: {str(e)}")
        print("Проверьте структуру данных и попробуйте снова")

  from .autonotebook import tqdm as notebook_tqdm


Успешно загружено 244303 примеров
Размерность признаков: (244303, 10)
Максимальный индекс карты: 24


Epoch 1: 100%|██████████| 3627/3627 [00:21<00:00, 170.85it/s]


Epoch 1, Loss: 1.1622


Epoch 2: 100%|██████████| 3627/3627 [00:22<00:00, 162.88it/s]


Epoch 2, Loss: 1.1417


Epoch 3: 100%|██████████| 3627/3627 [00:23<00:00, 156.86it/s]


Epoch 3, Loss: 1.1376


Epoch 4: 100%|██████████| 3627/3627 [00:23<00:00, 153.02it/s]


Epoch 4, Loss: 1.1352


Epoch 5: 100%|██████████| 3627/3627 [00:23<00:00, 156.58it/s]


Epoch 5, Loss: 1.1329


Epoch 6: 100%|██████████| 3627/3627 [00:23<00:00, 153.15it/s]


Epoch 6, Loss: 1.1315


Epoch 7: 100%|██████████| 3627/3627 [00:24<00:00, 148.65it/s]


Epoch 7, Loss: 1.1305


Epoch 8: 100%|██████████| 3627/3627 [00:24<00:00, 145.72it/s]


Epoch 8, Loss: 1.1291


Epoch 9: 100%|██████████| 3627/3627 [00:24<00:00, 147.82it/s]


Epoch 9, Loss: 1.1279


Epoch 10: 100%|██████████| 3627/3627 [00:24<00:00, 145.36it/s]


Epoch 10, Loss: 1.1264


Epoch 11: 100%|██████████| 3627/3627 [00:24<00:00, 146.16it/s]


Epoch 11, Loss: 1.1259


Epoch 12: 100%|██████████| 3627/3627 [00:24<00:00, 146.04it/s]


Epoch 12, Loss: 1.1244


Epoch 13: 100%|██████████| 3627/3627 [00:24<00:00, 145.84it/s]


Epoch 13, Loss: 1.1220


Epoch 14: 100%|██████████| 3627/3627 [00:24<00:00, 145.34it/s]


Epoch 14, Loss: 1.1201


Epoch 15: 100%|██████████| 3627/3627 [00:24<00:00, 145.29it/s]


Epoch 15, Loss: 1.1185


Epoch 16: 100%|██████████| 3627/3627 [00:24<00:00, 146.64it/s]


Epoch 16, Loss: 1.1160


Epoch 17: 100%|██████████| 3627/3627 [00:24<00:00, 145.63it/s]


Epoch 17, Loss: 1.1135


Epoch 18: 100%|██████████| 3627/3627 [00:24<00:00, 147.20it/s]


Epoch 18, Loss: 1.1112


Epoch 19: 100%|██████████| 3627/3627 [00:24<00:00, 145.51it/s]


Epoch 19, Loss: 1.1092


Epoch 20: 100%|██████████| 3627/3627 [00:24<00:00, 146.32it/s]

Epoch 20, Loss: 1.1073





# Prediction

In [7]:
import torch
import numpy as np
from torch.serialization import add_safe_globals
from sklearn.preprocessing import StandardScaler, LabelEncoder
import json
from collections import defaultdict
import torch.nn as nn
import torch.nn.functional as F

class DurakNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, num_layers=2):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=1000, embedding_dim=16)
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True
        )
        self.attention = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1, bias=False)
        )
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        cards = x[:, :6].long()
        cards = torch.clamp(cards, 0, self.embedding.num_embeddings - 1)
        other_features = x[:, 6:]
        
        cards_embedded = self.embedding(cards)
        lstm_input = torch.cat([
            cards_embedded,
            other_features.unsqueeze(1).expand(-1, 6, -1)
        ], dim=2)
        
        lstm_out, _ = self.lstm(lstm_input)
        attention_weights = F.softmax(self.attention(lstm_out), dim=1)
        context = torch.sum(attention_weights * lstm_out, dim=1)
        
        return self.fc(context)

class DurakModel:
    def __init__(self, model_path='durak_deep_model.pt'):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Альтернативный способ загрузки с обработкой sklearn объектов
        try:
            # Попробуем загрузить с weights_only=False
            checkpoint = torch.load(model_path, map_location=self.device, weights_only=False)
        except Exception as e:
            print(f"Ошибка загрузки модели: {str(e)}")
            print("Пробуем альтернативный способ загрузки...")
            checkpoint = self._load_model_alternative(model_path)
        
        # Инициализация модели
        self.model = DurakNN(
            input_size=checkpoint['input_size'],
            hidden_size=128,
            num_classes=len(checkpoint['label_encoder_classes'])
        ).to(self.device)
        self.model.load_state_dict(checkpoint['model_state_dict'])
        self.model.eval()
        
        # Восстанавливаем scaler
        self.scaler = StandardScaler()
        self.scaler.mean_ = checkpoint['scaler_mean']
        self.scaler.scale_ = checkpoint['scaler_var'] ** 0.5  # scale = sqrt(var)
        
        # Восстанавливаем кодировщики
        self.card_encoder = checkpoint['card_encoder']
        self.label_encoder = LabelEncoder()
        self.label_encoder.classes_ = checkpoint['label_encoder_classes']
        
        self.reverse_card_encoder = {v:k for k,v in self.card_encoder.items()}

    def _load_model_alternative(self, model_path):
        """Альтернативный способ загрузки для старых версий PyTorch"""
        import pickle
        
        with open(model_path, 'rb') as f:
            checkpoint = pickle.load(f)
        
        # Преобразуем тензоры если нужно
        if 'model_state_dict' in checkpoint:
            for k, v in checkpoint['model_state_dict'].items():
                if isinstance(v, torch.Tensor):
                    checkpoint['model_state_dict'][k] = v.to(self.device)
        
        return checkpoint

    def prepare_features(self, game_state, max_hand_size=6):
        """Подготавливает признаки из состояния игры"""
        # Кодируем карты в руке
        hand_encoded = [self.card_encoder.get(card, 0) for card in game_state['player_hand']]
        hand_encoded += [0] * (max_hand_size - len(hand_encoded))
        
        # Определяем козырную масть
        trump_suit = game_state['trump'][-1] if game_state.get('trump') else ''
        
        # Собираем признаки
        features = [
            *hand_encoded,
            game_state.get('game_type', 0),
            sum(1 for card in game_state['player_hand'] if card[-1] == trump_suit),
            len(game_state['player_hand']),
            len(game_state.get('deck', [])),
        ]
        
        return np.array(features, dtype=np.float32)

    def predict_action(self, game_state):
        """Предсказывает лучшее действие для текущего состояния"""
        features = self.prepare_features(game_state)
        features = self.scaler.transform([features])
        features_tensor = torch.FloatTensor(features).to(self.device)
        
        with torch.no_grad():
            output = self.model(features_tensor)
            action_idx = torch.argmax(output).item()
            q_values = torch.softmax(output, dim=1).cpu().numpy()[0]
        
        action = self.label_encoder.inverse_transform([action_idx])[0]
        return action, q_values

    def choose_card(self, game_state, action_type):
        """Выбирает конкретную карту для действия"""
        valid_cards = self.get_valid_cards(game_state, action_type)
        if not valid_cards:
            return None
            
        # Простейшая стратегия: выбираем карту с наименьшим номиналом
        if action_type == 'attack':
            return min(valid_cards, key=lambda x: (x[:-1], x[-1]))
        else:  # defend
            trump_suit = game_state['trump'][-1]
            attack_card = game_state['table'][-1]['attack_card']['card']
            
            # Сначала пытаемся побить той же мастью
            same_suit = [c for c in valid_cards if c[-1] == attack_card[-1] and c[:-1] > attack_card[:-1]]
            if same_suit:
                return min(same_suit, key=lambda x: x[:-1])
            
            # Если нет - побить козырем
            trumps = [c for c in valid_cards if c[-1] == trump_suit]
            if trumps:
                return min(trumps, key=lambda x: x[:-1])
            
            return None  # Не можем побить

    def get_valid_cards(self, game_state, action_type):
        """Возвращает допустимые карты для действия"""
        if action_type == 'attack':
            if not game_state['table']:
                return game_state['player_hand']  # Первая атака - любые карты
            else:
                # Можно подкидывать только карты того же номинала, что уже на столе
                table_ranks = {card['attack_card']['card'][:-1] for card in game_state['table']}
                return [card for card in game_state['player_hand'] if card[:-1] in table_ranks]
        elif action_type == 'defend':
            if not game_state['table']:
                return []
            attack_card = game_state['table'][-1]['attack_card']['card']
            trump_suit = game_state['trump'][-1]
            
            valid = []
            for card in game_state['player_hand']:
                # Можно побить той же мастью и старше
                if card[-1] == attack_card[-1] and card[:-1] > attack_card[:-1]:
                    valid.append(card)
                # Или любым козырем (если атака не козырь)
                elif card[-1] == trump_suit and attack_card[-1] != trump_suit:
                    valid.append(card)
            return valid
        return []

    def make_decision(self, game_state):
        """Основной метод для принятия решения"""
        action, q_values = self.predict_action(game_state)
        
        if action in ['attack', 'defend']:
            card = self.choose_card(game_state, action)
            if card:
                return {'type': action, 'move': card}
            else:
                return {'type': 'take'}  # Если не можем побить - берем
        else:
            return {'type': action}
        
def save_model(model, scaler, card_encoder, label_encoder, input_size, path='durak_deep_model_new.pt'):
    """Новый способ сохранения модели, совместимый с PyTorch 2.6+"""
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'scaler_mean': scaler.mean_,
        'scaler_var': scaler.var_,  # Сохраняем дисперсию вместо scale_
        'card_encoder': dict(card_encoder),
        'label_encoder_classes': label_encoder.classes_,
        'input_size': input_size,
        'device': next(model.parameters()).device
    }
        
    torch.save(checkpoint, path)
    print(f"Модель сохранена в {path}")

# Пример использования:
if __name__ == "__main__":
    # 1. Сначала пересохраните модель в новом формате (если старая модель есть)
    try:
        # Загрузка старой модели
        old_checkpoint = torch.load('durak_deep_model.pt', map_location='cpu', weights_only=False)
        
        # Создание новых объектов
        new_scaler = StandardScaler()
        new_scaler.mean_ = old_checkpoint['scaler'].mean_
        new_scaler.var_ = old_checkpoint['scaler'].var_
        
        # Сохранение в новом формате
        save_model(
            model=old_checkpoint['model'],
            scaler=new_scaler,
            card_encoder=old_checkpoint['card_encoder'],
            label_encoder=old_checkpoint['label_encoder'],
            input_size=old_checkpoint['input_size'],
            path='durak_deep_model_new.pt'
        )
    except Exception as e:
        print(f"Ошибка при конвертации модели: {e}")

    # 2. Теперь загружаем новую модель
    try:
        durak_ai = DurakModel('durak_deep_model.pt')
        
        test_state = {
            'trump': '10H',
            'player_hand': ['9S', '10D', '14H', '12C', '13D'],
            'game_type': 0,
            'deck': ['11S', '11C', '10C', '12H'],
            'table': [],
            'opponent_hand_count': 4
        }
        
        decision = durak_ai.make_decision(test_state)
        print("Принято решение:", decision)
        
    except Exception as e:
        print(f"Ошибка при работе модели: {e}")

Ошибка при конвертации модели: 'model'
Ошибка при работе модели: 'label_encoder_classes'
