#### 다음 실습 코드는 학습 목적으로만 사용 바랍니다. 문의 : audit@korea.ac.kr 임성열 Ph.D.

In [1]:
%pip install torch numpy

Collecting torch
  Using cached torch-2.8.0-cp311-cp311-win_amd64.whl (241.4 MB)
Collecting numpy
  Using cached numpy-2.3.2-cp311-cp311-win_amd64.whl (13.1 MB)
Collecting filelock
  Using cached filelock-3.18.0-py3-none-any.whl (16 kB)
Collecting sympy>=1.13.3
  Using cached sympy-1.14.0-py3-none-any.whl (6.3 MB)
Collecting networkx
  Using cached networkx-3.5-py3-none-any.whl (2.0 MB)
Collecting jinja2
  Using cached jinja2-3.1.6-py3-none-any.whl (134 kB)
Collecting fsspec
  Using cached fsspec-2025.7.0-py3-none-any.whl (199 kB)
Collecting mpmath<1.4,>=1.1.0
  Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Collecting MarkupSafe>=2.0
  Using cached MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl (15 kB)
Installing collected packages: mpmath, sympy, numpy, networkx, MarkupSafe, fsspec, filelock, jinja2, torch
Successfully installed MarkupSafe-3.0.2 filelock-3.18.0 fsspec-2025.7.0 jinja2-3.1.6 mpmath-1.3.0 networkx-3.5 numpy-2.3.2 sympy-1.14.0 torch-2.8.0
Note: you may need to restart t


[notice] A new release of pip available: 22.3 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 데이터 준비
text = "hello world machine learning is fun "
chars = sorted(list(set(text)))  # 등장 문자 목록
char_to_idx = {c: i for i, c in enumerate(chars)}
idx_to_char = {i: c for i, c in enumerate(chars)}

# 파라미터 설정
input_size = len(chars)
hidden_size = 50
num_layers = 1
seq_length = 10
learning_rate = 0.01

# 학습 데이터 만들기
input_data = []
target_data = []

for i in range(0, len(text) - seq_length):
    input_seq = text[i:i+seq_length]
    target_seq = text[i+1:i+seq_length+1]
    
    input_data.append([char_to_idx[c] for c in input_seq])
    target_data.append([char_to_idx[c] for c in target_seq])

input_data = torch.LongTensor(input_data)
target_data = torch.LongTensor(target_data)

# 모델 정의
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(CharLSTM, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, input_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        return out, hidden

model = CharLSTM(input_size, hidden_size, num_layers)

# 손실 함수 및 최적화
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 학습
for epoch in range(200):
    hidden = None
    output, hidden = model(input_data, hidden)
    
    loss = criterion(output.view(-1, input_size), target_data.view(-1))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if epoch % 20 == 0:
        print(f"Epoch [{epoch}/200] Loss: {loss.item():.4f}")

# 문장 생성
def generate_text(start_str, length):
    model.eval()
    chars_input = torch.LongTensor([[char_to_idx[c] for c in start_str]])
    hidden = None
    result = start_str
    
    for _ in range(length):
        output, hidden = model(chars_input, hidden)
        last_char_logits = output[:, -1, :]
        last_char_idx = torch.argmax(last_char_logits, dim=1).item()
        
        result += idx_to_char[last_char_idx]
        
        chars_input = torch.LongTensor([[last_char_idx]])
    
    return result

# 결과 보기
print("\nGenerated Text:")
print(generate_text("hello", 50))


Epoch [0/200] Loss: 2.8702
Epoch [20/200] Loss: 0.2600
Epoch [40/200] Loss: 0.0943
Epoch [60/200] Loss: 0.0853
Epoch [80/200] Loss: 0.0832
Epoch [100/200] Loss: 0.0822
Epoch [120/200] Loss: 0.0815
Epoch [140/200] Loss: 0.0810
Epoch [160/200] Loss: 0.0807
Epoch [180/200] Loss: 0.0804

Generated Text:
hello world machine learning is fun is fun is fun is fu
