In [1]:
# Импорт необходимых библиотек
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score
from transformers import BertConfig, BertModel

# Загрузка данных Iris
data = load_iris()
X, y = data.data, data.target

# Разделение на train/test (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Нормализация данных
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Конвертация в тензоры PyTorch
X_train = torch.FloatTensor(X_train)
y_train = torch.LongTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.LongTensor(y_test)

# Создание DataLoader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Определение модели Transformer для классификации
class IrisTransformer(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        # Конфигурация BERT (упрощенная)
        self.config = BertConfig(
            hidden_size=64,
            num_attention_heads=4,
            num_hidden_layers=2,
            max_position_embeddings=1  # Для табличных данных
        )
        # Слои модели
        self.embedding = nn.Linear(input_dim, self.config.hidden_size)
        self.bert = BertModel(self.config)
        self.classifier = nn.Linear(self.config.hidden_size, num_classes)
    
    def forward(self, x):
        # Преобразование входных данных
        x = self.embedding(x).unsqueeze(1)  # Добавляем dimension для последовательности
        # Пропускаем через BERT
        outputs = self.bert(inputs_embeds=x)
        # Берем представление первого токена
        pooled_output = outputs.last_hidden_state[:, 0, :]
        # Классификация
        return self.classifier(pooled_output)

# Инициализация модели
model = IrisTransformer(input_dim=X_train.shape[1], num_classes=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Обучение модели
epochs = 50
for epoch in range(epochs):
    model.train()
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    
    # Оценка на тестовых данных
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        _, predicted = torch.max(test_outputs, 1)
        acc = accuracy_score(y_test, predicted)
        f1 = f1_score(y_test, predicted, average='weighted')
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, '
              f'Test Accuracy: {acc:.4f}, Test F1: {f1:.4f}')

# Финальная оценка
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    _, predicted = torch.max(test_outputs, 1)
    final_acc = accuracy_score(y_test, predicted)
    final_f1 = f1_score(y_test, predicted, average='weighted')

print('\nFinal Evaluation:')
print(f'Accuracy: {final_acc:.4f}')
print(f'F1 Score: {final_f1:.4f}')

  from .autonotebook import tqdm as notebook_tqdm


Epoch 10/50, Loss: 0.1256, Test Accuracy: 1.0000, Test F1: 1.0000
Epoch 20/50, Loss: 0.0064, Test Accuracy: 0.9667, Test F1: 0.9664
Epoch 30/50, Loss: 0.0056, Test Accuracy: 0.9667, Test F1: 0.9664
Epoch 40/50, Loss: 0.0202, Test Accuracy: 1.0000, Test F1: 1.0000
Epoch 50/50, Loss: 0.0104, Test Accuracy: 1.0000, Test F1: 1.0000

Final Evaluation:
Accuracy: 1.0000
F1 Score: 1.0000


In [2]:
# Импорт библиотек
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Загрузка данных (уже сделано ранее)
# X_train, X_test, y_train, y_test - из предыдущего кода

# 1. Обучение Random Forest
rf_model = RandomForestClassifier(
    n_estimators=100,  # Количество деревьев
    max_depth=3,       # Глубина деревьев (для избежания переобучения)
    random_state=42
)
rf_model.fit(X_train, y_train)

# 2. Предсказание и оценка
rf_pred = rf_model.predict(X_test)

# 3. Метрики
print("\nRandom Forest Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.4f}")
print(f"F1 Score (weighted): {f1_score(y_test, rf_pred, average='weighted'):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, rf_pred, target_names=data.target_names))

# 4. Сравнение с Transformer (из предыдущего кода)
print("\nComparison:")
print(f"{'Model':<15} | {'Accuracy':<8} | {'F1 Score':<8}")
print("-" * 35)
print(f"{'Transformer':<15} | {final_acc:.4f}    | {final_f1:.4f}")
print(f"{'Random Forest':<15} | {accuracy_score(y_test, rf_pred):.4f}    | {f1_score(y_test, rf_pred, average='weighted'):.4f}")


Random Forest Evaluation:
Accuracy: 1.0000
F1 Score (weighted): 1.0000

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Comparison:
Model           | Accuracy | F1 Score
-----------------------------------
Transformer     | 1.0000    | 1.0000
Random Forest   | 1.0000    | 1.0000


In [1]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, DataLoader

# 1. Загрузка реальных данных (ежедневная температура)
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv"
data = pd.read_csv(url, parse_dates=['Date'], index_col='Date')
temps = data['Temp'].values.astype(float)

# Нормализация данных (0-1)
scaler = MinMaxScaler()
temps = scaler.fit_transform(temps.reshape(-1, 1)).flatten()

# 2. Подготовка последовательностей
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data)-seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return torch.FloatTensor(X), torch.FloatTensor(y)

SEQ_LENGTH = 30  # Используем 30 дней для предсказания 31-го
X, y = create_sequences(temps, SEQ_LENGTH)

# 3. Класс Dataset
class TemperatureDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], y[idx]

dataset = TemperatureDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 4. Модель трансформера
class TempPredictor(nn.Module):
    def __init__(self, input_size=1, d_model=32, nhead=2, num_layers=2):
        super().__init__()
        self.embedding = nn.Linear(input_size, d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, 
            nhead=nhead,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.decoder = nn.Linear(d_model, 1)
        
    def forward(self, x):
        x = x.unsqueeze(-1)  # [batch, seq_len, 1]
        x = self.embedding(x)  # [batch, seq_len, d_model]
        x = self.transformer(x)
        x = x[:, -1, :]  # Берем последний элемент
        return self.decoder(x)

model = TempPredictor()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 5. Обучение
for epoch in range(20):
    for batch_x, batch_y in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

# 6. Предсказание на реальных данных
test_seq = temps[-SEQ_LENGTH:]  # Последние 30 дней
test_tensor = torch.FloatTensor(test_seq).unsqueeze(0)
pred = model(test_tensor)
pred_temp = scaler.inverse_transform(pred.detach().numpy())

print(f"\nПоследние известные температуры: {scaler.inverse_transform(test_seq.reshape(-1, 1)).flatten()[-5:]}")
print(f"Предсказанная температура на следующий день: {pred_temp[0][0]:.1f}°C")

  return torch.FloatTensor(X), torch.FloatTensor(y)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Loss: 0.0125
Epoch 2, Loss: 0.0184
Epoch 3, Loss: 0.0050
Epoch 4, Loss: 0.0323
Epoch 5, Loss: 0.0322
Epoch 6, Loss: 0.0366
Epoch 7, Loss: 0.0258
Epoch 8, Loss: 0.0207
Epoch 9, Loss: 0.0253
Epoch 10, Loss: 0.0252
Epoch 11, Loss: 0.0045
Epoch 12, Loss: 0.0103
Epoch 13, Loss: 0.0224
Epoch 14, Loss: 0.0250
Epoch 15, Loss: 0.0334
Epoch 16, Loss: 0.0139
Epoch 17, Loss: 0.0079
Epoch 18, Loss: 0.0130
Epoch 19, Loss: 0.0108
Epoch 20, Loss: 0.0381

Последние известные температуры: [14.  13.6 13.5 15.7 13. ]
Предсказанная температура на следующий день: 11.7°C
