In [3]:
import torch

import torch.nn as nn

In [39]:
class MyGRUCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MyGRUCell, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.x2h = nn.Linear(input_size, 3 * hidden_size, bias=True)
        self.h2h = nn.Linear(hidden_size, 3 * hidden_size, bias=True)

    def forward(self, x, h):
        x_gates = self.x2h(x)
        h_gates = self.h2h(h)
        
        x_r, x_z, x_n = x_gates.chunk(3, 1)
        h_r, h_z, h_n = h_gates.chunk(3, 1)

        r = torch.sigmoid(x_r + h_r)
        z = torch.sigmoid(x_z + h_z)
        n = torch.tanh(x_n + r * h_n)
        h_new = (1 - z) * n + z * h
        return h_new

In [40]:
import torch
import torch.nn as nn

class MyModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MyModel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

        self.gru_cell = MyGRUCell(input_size=input_size, hidden_size=hidden_size)

    def forward(self, x, h=None):
        if h is None:
            h = torch.zeros(x.size(0), self.hidden_size, device=x.device, dtype=torch.float)
        outputs = []
        for i in range(x.size(1)):
            h = self.gru_cell(x[:, i], h)
            outputs.append(h)
        
        output = outputs[-1]  # 마지막 hidden state 사용
        output = self.fc(output)
        output = self.sigmoid(output)
        return output, h


In [41]:
# 모델 인스턴스 생성
input_size = 10  # 입력 특성의 수
hidden_size = 20  # hidden state의 크기
output_size = 1  # 출력의 크기
model = MyModel(input_size, hidden_size, output_size)

# 입력 데이터 예시 (batch_size, sequence_length, input_size)
x = torch.randn(32, 5, input_size)

# 모델 실행
output, final_hidden = model(x)

In [None]:
# from datasets import load_dataset

# dataset = load_dataset("imdb")
# train_dataset = dataset['train']
# test_dataset = dataset['test']

# from transformers import BertTokenizer

# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# def tokenize_function(examples):
#     tokenized = tokenizer(examples['text'], padding="max_length", truncation=True)
    
#     # 특정 필드만 float로 변환 (예: 'input_ids')
#     if 'input_ids' in tokenized:
#         tokenized['input_ids'] = [torch.tensor(ids, dtype=torch.float) for ids in tokenized['input_ids']]
    
#     return tokenized

# tokenized_datasets = dataset.map(tokenize_function, batched=True)


KeyError: "Invalid key: slice(None, 3000, None). Please first select a split. For example: `my_dataset_dictionary['train'][slice(None, 3000, None)]`. Available splits: ['test', 'train', 'unsupervised']"

In [43]:
from torch.optim import Adam
from torch.utils.data import DataLoader


# 데이터 준비
train_dataset = tokenized_datasets["train"].remove_columns(["text"])
train_dataset = train_dataset.rename_column("label", "labels")
train_dataset.set_format("torch")

test_dataset = tokenized_datasets["test"].remove_columns(["text"])
test_dataset = test_dataset.rename_column("label", "labels")
test_dataset.set_format("torch")

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=8)
test_dataloader = DataLoader(test_dataset, batch_size=8)
# 옵티마이저 설정
optimizer = Adam(model.parameters(), lr=0.001)

# 손실 함수 정의
criterion = nn.BCELoss()

# 학습 루프
device = torch.device("mps" if torch.mps.is_available() else "cpu")
model.to(device)

MyModel(
  (fc): Linear(in_features=20, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (gru_cell): MyGRUCell(
    (x2h): Linear(in_features=10, out_features=60, bias=True)
    (h2h): Linear(in_features=20, out_features=60, bias=True)
  )
)

In [44]:

num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    for batch in train_dataloader:
        input_ids = batch['input_ids'].to(device)
        labels = batch['labels'].float().to(device)
        optimizer.zero_grad()
        outputs = model(input_ids)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
    
    # 평가
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_dataloader:
            input_ids = batch['input_ids'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids)
            predicted = (outputs.squeeze() > 0.5).long()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Test Accuracy: {accuracy:.4f}")

RuntimeError: MPS device does not support linear for non-float inputs