In [None]:
# transformers 모듈 설치
# pip install transformers

In [None]:
from transformers import ViTImageProcessor, ViTForImageClassification
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])

In [None]:
## facebook/detr-resnet-50

In [None]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )


In [None]:
# nvidia/segformer-b0-finetuned-ade-512-512

In [None]:
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from PIL import Image
import requests

processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits  # shape (batch_size, num_labels, height/4, width/4)


In [None]:
logits.shape

In [None]:
# openai-community/gpt2

In [None]:
from transformers import GPT2Tokenizer, TFGPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2Model.from_pretrained('gpt2')
text = "My name is Sean. I am"
encoded_input = tokenizer(text, return_tensors='tf')
output = model(encoded_input)


In [2]:
output

TFBaseModelOutputWithPastAndCrossAttentions(last_hidden_state=<tf.Tensor: shape=(1, 7, 768), dtype=float32, numpy=
array([[[-0.03363922, -0.04301776, -0.28267986, ..., -0.1523465 ,
          0.0160212 , -0.11786222],
        [-0.55863297,  0.0885553 , -0.76869094, ...,  0.6422035 ,
         -0.15737464,  0.23898187],
        [ 0.03950048, -0.02845427,  0.12475415, ...,  0.25877094,
         -0.10827079,  0.6278081 ],
        ...,
        [-0.0139822 ,  0.01275538,  0.45013756, ...,  0.16861087,
         -0.1758398 ,  0.42693022],
        [-0.09566795, -0.34643757,  0.6811561 , ...,  0.25499812,
         -0.17679656,  0.12768461],
        [-0.73701507,  0.13632008, -0.0694763 , ...,  0.5517724 ,
          0.14897977,  0.7437069 ]]], dtype=float32)>, past_key_values=(<tf.Tensor: shape=(2, 1, 12, 7, 64), dtype=float32, numpy=
array([[[[[-1.09587240e+00,  1.84698057e+00,  8.98728192e-01, ...,
           -1.29997241e+00, -7.13938832e-01,  1.15244293e+00],
          [-2.82440710e+00,  2.6346