In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# 1. 데이터 로딩
df1 = pd.read_excel("./data/1_600.xlsx")
df2 = pd.read_excel("./data/601_1187.xlsx")
df = pd.concat([df1, df2], ignore_index=True)
df_clean = df.iloc[2:].reset_index(drop=True)
numbers = df_clean[["Unnamed: 13", "Unnamed: 14", "Unnamed: 15",
                    "Unnamed: 16", "Unnamed: 17", "Unnamed: 18"]].copy()
numbers = numbers.apply(pd.to_numeric, errors='coerce').dropna().astype(int)

# Flatten 시퀀스
seq = numbers.values.flatten()

# 2. 데이터셋 정의
class LottoDataset(Dataset):
    def __init__(self, sequence, input_len=60, target_len=6):
        self.input_len = input_len
        self.target_len = target_len
        self.sequence = sequence

    def __len__(self):
        return len(self.sequence) - self.input_len - self.target_len

    def __getitem__(self, idx):
        X = self.sequence[idx:idx + self.input_len]
        y = self.sequence[idx + self.input_len:idx + self.input_len + self.target_len]
        return torch.tensor(X, dtype=torch.long), torch.tensor(y, dtype=torch.long)

dataset = LottoDataset(seq, input_len=60, target_len=6)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# 3. Transformer 모델 정의
class LottoTransformer(nn.Module):
    def __init__(self, vocab_size=46, d_model=64, nhead=4, num_layers=2):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, d_model)
        self.pos_embed = nn.Parameter(torch.randn(1, 60, d_model))
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, vocab_size)

    def forward(self, x):
        x = self.embed(x) + self.pos_embed
        x = self.transformer(x)
        x = x[:, -1, :]  # 마지막 위치
        x = self.fc(x)
        return x

model = LottoTransformer()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# 4. 학습
for epoch in range(10):  # Epoch 조절 가능
    for X_batch, y_batch in loader:
        pred = model(X_batch)
        loss = criterion(pred, y_batch[:, 0])  # 첫 숫자만 예측
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# 5. 최근 10회로 다음 회차 예측
recent_seq = torch.tensor(seq[-60:], dtype=torch.long).unsqueeze(0)
with torch.no_grad():
    output = model(recent_seq)
    pred_num = output.argmax(dim=1).item()

print("1188회 예측 첫 번호:", pred_num)
