In [None]:
# transformers 모듈 설치
# pip install transformers
# google/vit-base-patch16-224

In [3]:
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import requests

# url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
url = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ27RdQACLBoJNY3NyFxpw_Rx03dQn4zrSY9Q&s'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])


Predicted class: king penguin, Aptenodytes patagonica


In [None]:
# facebook/detr-resnet-50

In [5]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

# url = "http://images.cocodataset.org/val2017/000000039769.jpg"
url = "https://i.ytimg.com/vi/g01Ug3kWEDo/maxresdefault.jpg"
image = Image.open(requests.get(url, stream=True).raw)

# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )


Detected person with confidence 0.916 at location [987.22, 355.01, 1140.11, 576.47]
Detected person with confidence 0.952 at location [1137.59, 349.63, 1275.43, 518.91]
Detected person with confidence 0.911 at location [983.41, 356.3, 1058.85, 545.63]
Detected handbag with confidence 0.933 at location [490.96, 414.93, 515.36, 447.28]
Detected person with confidence 0.992 at location [496.83, 342.57, 562.7, 550.11]
Detected handbag with confidence 0.984 at location [786.81, 419.52, 815.84, 451.63]
Detected person with confidence 0.903 at location [7.27, 286.23, 150.21, 571.06]
Detected handbag with confidence 0.934 at location [288.52, 465.7, 331.7, 510.49]
Detected person with confidence 0.953 at location [654.56, 338.02, 703.32, 506.51]
Detected person with confidence 0.993 at location [340.93, 292.38, 471.28, 663.97]
Detected person with confidence 0.955 at location [125.33, 361.77, 297.55, 612.39]
Detected person with confidence 0.997 at location [591.45, 328.82, 674.56, 530.94]
Det

In [None]:
# nvidia/segformer-b0-finetuned-ade-512-512

In [6]:
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from PIL import Image
import requests

processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)

  return func(*args, **kwargs)


In [7]:
logits

tensor([[[[ -4.6310,  -5.5232,  -6.2356,  ...,  -4.9868,  -4.7341,  -4.6612],
          [ -5.1921,  -6.1444,  -6.5996,  ...,  -5.1771,  -5.0288,  -5.1761],
          [ -5.4424,  -6.2790,  -6.7574,  ...,  -5.2748,  -5.1669,  -5.0999],
          ...,
          [ -8.5836,  -9.0887,  -9.5409,  ...,  -8.7190,  -8.5183,  -8.3098],
          [ -8.4320,  -8.8555,  -9.1848,  ...,  -7.7831,  -7.4822,  -7.3598],
          [ -8.3224,  -8.8764,  -9.1849,  ...,  -7.1564,  -6.8759,  -6.6428]],

         [[-12.1391, -13.3122, -13.9554,  ..., -11.8693, -11.5761, -11.3418],
          [-12.8732, -13.9352, -14.3563,  ..., -12.3348, -12.1524, -12.3176],
          [-12.9438, -13.8226, -14.2513,  ..., -12.3360, -12.3081, -12.2396],
          ...,
          [-13.9108, -14.2715, -14.6169,  ..., -13.2829, -13.3424, -13.3222],
          [-13.8718, -14.2715, -14.3808,  ..., -12.5270, -12.4334, -12.3057],
          [-13.6848, -14.2857, -14.5154,  ..., -11.8523, -11.8534, -11.6054]],

         [[-12.5134, -13.4687,

In [8]:
logits.shape

torch.Size([1, 150, 128, 128])

In [None]:
# openai-community/gpt2

In [9]:
from transformers import GPT2Tokenizer, TFGPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2Model.from_pretrained('gpt2')
text = "My name Sean. I am "
encoded_input = tokenizer(text, return_tensors='tf')
output = model(encoded_input)

All PyTorch model weights were used when initializing TFGPT2Model.

All the weights of TFGPT2Model were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2Model for predictions without further training.


In [10]:
output

TFBaseModelOutputWithPastAndCrossAttentions(last_hidden_state=<tf.Tensor: shape=(1, 7, 768), dtype=float32, numpy=
array([[[-0.03363922, -0.04301776, -0.28267986, ..., -0.1523465 ,
          0.0160212 , -0.11786222],
        [-0.55863297,  0.0885553 , -0.76869094, ...,  0.6422035 ,
         -0.15737464,  0.23898187],
        [ 0.60879236,  0.32740775, -0.1714894 , ...,  0.02277801,
          0.0630625 ,  0.23419666],
        ...,
        [-0.20968367, -0.3857963 ,  0.6751666 , ...,  0.08323035,
         -0.29230502,  0.19849446],
        [-0.68325955,  0.11009037,  0.07789887, ...,  0.36662835,
          0.22815844,  0.87713903],
        [ 0.20597857, -0.35686597,  1.6376729 , ...,  0.9829089 ,
         -0.37986955,  0.27725375]]], dtype=float32)>, past_key_values=(<tf.Tensor: shape=(2, 1, 12, 7, 64), dtype=float32, numpy=
array([[[[[-1.09587240e+00,  1.84698057e+00,  8.98728192e-01, ...,
           -1.29997241e+00, -7.13938832e-01,  1.15244293e+00],
          [-2.82440710e+00,  2.6346

In [None]:
# 텍스트로 변환하려면 TFGPT2LMHeadModel 를 써야 한다고 합니다.