In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# 간단한 다층 퍼셉트론 모델 정의
class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleMLP, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

# 모델 인스턴스 생성
input_size = 10
hidden_size = 20
output_size = 2
model = SimpleMLP(input_size, hidden_size, output_size)

# 손실 함수와 옵티마이저 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 가상의 입력 데이터 생성
x = torch.randn(5, input_size)
y = torch.randint(0, output_size, (5,))

for epoch in range(1000):
  # 순전파 및 역전파
  optimizer.zero_grad()
  outputs = model(x)
  loss = criterion(outputs, y)
  loss.backward()
  optimizer.step()

  if (epoch+1) % 200 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

print(f"모델 구조: {model}")
print(f"손실값: {loss.item()}")


Epoch 200, Loss: 0.2663
Epoch 400, Loss: 0.0957
Epoch 600, Loss: 0.0485
Epoch 800, Loss: 0.0302
Epoch 1000, Loss: 0.0211
모델 구조: SimpleMLP(
  (layer1): Linear(in_features=10, out_features=20, bias=True)
  (relu): ReLU()
  (layer2): Linear(in_features=20, out_features=2, bias=True)
)
손실값: 0.02113472670316696


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.losses import SparseCategoricalCrossentropy

# 1) 모델 정의: Sequential API 사용
model = Sequential([
    Dense(20, activation='relu', input_shape=(10,)),  # 은닉층
    Dense(2)                                         # 출력층 (logits 출력)
])

# 2) 컴파일: 옵티마이저, 손실함수, 평가 지표 설정
model.compile(
    optimizer=SGD(learning_rate=0.01),
    loss=SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

# 3) 가상의 입력 및 레이블 데이터 생성
x = tf.random.normal((5, 10))                               # (배치크기=5, 특성=10)
y = tf.random.uniform((5,), minval=0, maxval=2, dtype=tf.int32)  # 정수 레이블 0 또는 1

# 4) 학습: epochs=1, batch_size=5
history = model.fit(x, y, epochs=1000, batch_size=5)

# 5) 결과 출력
print("\n모델 구조:")
model.summary()
print(f"\n최종 손실값: {history.history['loss'][0]:.4f}")
print(f"최종 정확도: {history.history['accuracy'][0]:.4f}")


Epoch 1/1000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 588ms/step - accuracy: 0.6000 - loss: 0.7149
Epoch 2/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6000 - loss: 0.6984
Epoch 3/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.6000 - loss: 0.6823
Epoch 4/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.6000 - loss: 0.6667
Epoch 5/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.6000 - loss: 0.6516
Epoch 6/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.6000 - loss: 0.6370
Epoch 7/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.6000 - loss: 0.6228
Epoch 8/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.6000 - loss: 0.6090
Epoch 9/1000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m


최종 손실값: 0.7149
최종 정확도: 0.6000


PyTorch를 이용한 LSTM 구현

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# 커스텀 데이터셋 클래스
class SequenceDataset(Dataset):
    def __init__(self, sequences, labels):
        self.sequences = sequences
        self.labels = labels

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.labels[idx]

# LSTM을 이용한 시퀀스 분류 모델
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # LSTM 레이어
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

        # 출력 레이어
        self.fc = nn.Linear(hidden_size, output_size)

        # 드롭아웃 레이어
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        # 초기 은닉 상태와 셀 상태 설정
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # LSTM 순전파
        out, _ = self.lstm(x, (h0, c0))

        # 드롭아웃 적용
        out = self.dropout(out[:, -1, :])

        # 출력 레이어 통과
        out = self.fc(out)
        return out

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        # 학습 모드
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for sequences, labels in train_loader:
            sequences, labels = sequences.to(device), labels.to(device)

            # 기울기 초기화
            optimizer.zero_grad()

            # 순전파
            outputs = model(sequences)
            loss = criterion(outputs, labels)

            # 역전파 및 최적화
            loss.backward()
            optimizer.step()

            # 통계 계산
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        # 검증
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for sequences, labels in val_loader:
                sequences, labels = sequences.to(device), labels.to(device)
                outputs = model(sequences)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # 에포크별 결과 출력
        train_loss = train_loss / len(train_loader)
        train_acc = 100 * train_correct / train_total
        val_loss = val_loss / len(val_loader)
        val_acc = 100 * val_correct / val_total

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%')

        # 최고 성능 모델 저장
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')

    # 최종 평가 지표 계산
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')

    print(f'\nFinal Evaluation Metrics:')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

def main():
    # 하이퍼파라미터 설정
    input_size = 10  # 입력 특성 차원
    hidden_size = 64  # 은닉 상태 차원
    output_size = 2  # 출력 클래스 수
    num_layers = 2
    batch_size = 32
    seq_length = 15
    num_epochs = 1000
    learning_rate = 0.001

    # 디바이스 설정
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 가상의 데이터 생성
    num_samples = 1000
    sequences = torch.randn(num_samples, seq_length, input_size)
    labels = torch.randint(0, output_size, (num_samples,))

    # 데이터 분할
    X_train, X_val, y_train, y_val = train_test_split(
        sequences, labels, test_size=0.2, random_state=42
    )

    # 데이터로더 생성
    train_dataset = SequenceDataset(X_train, y_train)
    val_dataset = SequenceDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # 모델 생성
    model = LSTMModel(input_size, hidden_size, output_size, num_layers).to(device)

    # 손실 함수와 최적화기 설정
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 모델 학습
    train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)


In [6]:
main()

Epoch [1/1000], Train Loss: 0.6945, Train Acc: 49.88%, Val Loss: 0.6935, Val Acc: 51.50%
Epoch [2/1000], Train Loss: 0.6939, Train Acc: 50.12%, Val Loss: 0.6935, Val Acc: 53.50%
Epoch [3/1000], Train Loss: 0.6920, Train Acc: 53.12%, Val Loss: 0.6936, Val Acc: 53.00%
Epoch [4/1000], Train Loss: 0.6901, Train Acc: 52.88%, Val Loss: 0.6951, Val Acc: 48.00%
Epoch [5/1000], Train Loss: 0.6896, Train Acc: 53.12%, Val Loss: 0.6953, Val Acc: 52.00%
Epoch [6/1000], Train Loss: 0.6847, Train Acc: 54.00%, Val Loss: 0.6993, Val Acc: 47.00%
Epoch [7/1000], Train Loss: 0.6810, Train Acc: 55.62%, Val Loss: 0.7043, Val Acc: 50.00%
Epoch [8/1000], Train Loss: 0.6728, Train Acc: 58.12%, Val Loss: 0.7193, Val Acc: 46.00%
Epoch [9/1000], Train Loss: 0.6650, Train Acc: 58.38%, Val Loss: 0.7282, Val Acc: 46.50%
Epoch [10/1000], Train Loss: 0.6571, Train Acc: 60.00%, Val Loss: 0.7205, Val Acc: 51.00%
Epoch [11/1000], Train Loss: 0.6423, Train Acc: 61.88%, Val Loss: 0.7596, Val Acc: 47.50%
Epoch [12/1000], Tr

tensorflow 이용 LSTM

In [7]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# 1) 하이퍼파라미터 설정
input_size    = 10
hidden_size   = 64
output_size   = 2
num_layers    = 2
seq_length    = 15
batch_size    = 32
num_epochs    = 1000
learning_rate = 0.001

# 2) 가상 데이터 생성
num_samples = 1000
sequences = np.random.randn(num_samples, seq_length, input_size).astype(np.float32)
labels    = np.random.randint(0, output_size, size=(num_samples,)).astype(np.int32)

# 3) 학습/검증 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(
    sequences, labels, test_size=0.2, random_state=42
)

# 4) tf.data.Dataset 생성 및 배치 처리
train_dataset = (
    tf.data.Dataset.from_tensor_slices((X_train, y_train))
    .shuffle(buffer_size=1000)
    .batch(batch_size)
)
val_dataset = (
    tf.data.Dataset.from_tensor_slices((X_val, y_val))
    .batch(batch_size)
)

# 5) 모델 정의 (Sequential API + LSTM × num_layers + Dropout + Dense)
model = tf.keras.Sequential()
for i in range(num_layers):
    # 중간 LSTM 층은 return_sequences=True, 마지막 층은 False
    model.add(tf.keras.layers.LSTM(
        hidden_size,
        return_sequences=(i < num_layers - 1)
    ))
# PyTorch 코드의 self.dropout 과 유사하게, 마지막 타임스텝 출력에 Dropout 적용
model.add(tf.keras.layers.Dropout(0.2))
# 출력층 (logits 생성)
model.add(tf.keras.layers.Dense(output_size))

# 6) 모델 컴파일: 옵티마이저, 손실함수, 평가 지표 설정
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

# 7) 체크포인트 콜백: 검증 손실이 가장 낮을 때만 best_model.h5로 저장
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    'best_model.h5',
    save_best_only=True,
    monitor='val_loss',
    mode='min'
)

# 8) 모델 학습
history = model.fit(
    train_dataset,
    epochs=num_epochs,
    validation_data=val_dataset,
    callbacks=[checkpoint_cb]
)

# 9) 최적의 가중치 로드
model.load_weights('best_model.h5')

# 10) 최종 평가 지표 계산
#    검증 세트 전체에 대해 예측을 수행하고, sklearn으로 평가
y_pred_prob = model.predict(val_dataset)           # (num_val_samples, output_size)
y_pred      = np.argmax(y_pred_prob, axis=1)       # 클래스 레이블로 변환
y_true      = y_val                                # train_test_split으로부터 분할된 실제 레이블

accuracy  = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall    = recall_score(y_true, y_pred, average='weighted')
f1        = f1_score(y_true, y_pred, average='weighted')

print('\n최종 평가 지표:')
print(f'Accuracy:  {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall:    {recall:.4f}')
print(f'F1 Score:  {f1:.4f}')


Epoch 1/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.4903 - loss: 0.6951



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 46ms/step - accuracy: 0.4901 - loss: 0.6951 - val_accuracy: 0.4950 - val_loss: 0.6970
Epoch 2/1000
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.5658 - loss: 0.6853



[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5650 - loss: 0.6855 - val_accuracy: 0.5350 - val_loss: 0.6940
Epoch 3/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.5485 - loss: 0.6811 - val_accuracy: 0.4950 - val_loss: 0.6949
Epoch 4/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.5745 - loss: 0.6794 - val_accuracy: 0.5000 - val_loss: 0.6993
Epoch 5/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.5808 - loss: 0.6694 - val_accuracy: 0.4900 - val_loss: 0.7034
Epoch 6/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.6033 - loss: 0.6677 - val_accuracy: 0.4950 - val_loss: 0.6987
Epoch 7/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.6063 - loss: 0.6713 - v

KeyboardInterrupt: 

### 트랜스포머의 전체 아키텍처

- **인코더-디코더 구조**
    - 인코더: 입력 시퀀스를 표현(representation)으로 변환
    - 디코더: 표현을 출력 시퀀스로 변환
- **인코더 블록 구성 요소**
    - 멀티-헤드 셀프-어텐션(Multi-Head Self-Attention)
    - 포지션-와이즈 피드-포워드 네트워크(Position-wise Feed-Forward Network)
    - 레이어 정규화(Layer Normalization)와 잔차 연결(Residual Connection)
- **디코더 블록 구성 요소**
    - 마스크드 멀티-헤드 셀프-어텐션(Masked Multi-Head Self-Attention)
    - 인코더-디코더 어텐션(Encoder-Decoder Attention)
    - 포지션-와이즈 피드-포워드 네트워크
- **위치 인코딩(Positional Encoding)**
    - 순서 정보를 모델에 제공하는 방법
    - 사인/코사인 함수를 이용한 위치 표현

### 3.5 PyTorch를 이용한 간단한 트랜스포머 구현

In [3]:
import torch
import torch.nn as nn
import math

# 위치 인코딩 구현
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_seq_length=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]

# 간단한 트랜스포머 인코더 레이어
class SimpleTransformerEncoder(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1):
        super(SimpleTransformerEncoder, self).__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        # 피드포워드 신경망
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        # 레이어 정규화
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)

        self.activation = nn.ReLU()

    def forward(self, src, src_mask=None):
        # 멀티헤드 어텐션
        src2, _ = self.self_attn(src, src, src, attn_mask=src_mask)
        src = src + self.dropout1(src2)  # 잔차 연결
        src = self.norm1(src)  # 레이어 정규화

        # 피드포워드 신경망
        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
        src = src + self.dropout2(src2)  # 잔차 연결
        src = self.norm2(src)  # 레이어 정규화

        return src

# 전체 트랜스포머 인코더 모델
class TransformerEncoderModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, dim_feedforward, num_layers, max_seq_length, dropout=0.1):
        super(TransformerEncoderModel, self).__init__()
        self.d_model = d_model

        # 임베딩 레이어
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_seq_length)

        # 트랜스포머 인코더 레이어 스택
        encoder_layers = []
        for _ in range(num_layers):
            encoder_layers.append(SimpleTransformerEncoder(d_model, nhead, dim_feedforward, dropout))
        self.encoder_layers = nn.ModuleList(encoder_layers)

        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        # 임베딩 및 위치 인코딩
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        src = self.dropout(src)

        # 인코더 레이어 통과
        for encoder_layer in self.encoder_layers:
            src = encoder_layer(src)

        return src

# 모델 파라미터 설정
vocab_size = 10000
d_model = 512
nhead = 8
dim_feedforward = 2048
num_layers = 2
max_seq_length = 100
batch_size = 16
seq_length = 30

# 모델 인스턴스 생성
model = TransformerEncoderModel(vocab_size, d_model, nhead, dim_feedforward, num_layers, max_seq_length)

# 가상의 입력 데이터 (배치 크기, 시퀀스 길이)
src = torch.randint(0, vocab_size, (batch_size, seq_length))
# print(src[0].shape)
output = model(src)

print(f"입력 크기: {src.shape}")
print(f"출력 크기: {output.shape}")


입력 크기: torch.Size([16, 30])
출력 크기: torch.Size([16, 30, 512])


In [4]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
classifier("I've been waiting for a HuggingFace course my whole life.")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9598049521446228}]

In [5]:
from transformers import pipeline

answer = pipeline("question-answering")
answer(
    question="Where do I work?",
    context="My name is Sylvain and I work at Hugging Face in Brooklyn"
)


No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Device set to use cpu


{'score': 0.6949766278266907, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}

In [6]:
from transformers import pipeline

translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
translator("Ce cours est produit par Hugging Face.")

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

Device set to use cpu


[{'translation_text': 'This course is produced by Hugging Face.'}]