<a href="https://colab.research.google.com/github/JSJeong-me/AI-Innovation-2024/blob/main/2-1-2class-prompt-example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

In [2]:


def VGG16(input_shape=(224, 224, 3), num_classes=1000):
    img_input = Input(shape=input_shape)

    # Block 1
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

    # Block 2
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

    # Block 3
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

    # Block 4
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

    # Block 5
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
    x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)

    # Classification block
    x = Flatten(name='flatten')(x)
    x = Dense(4096, activation='relu', name='fc1')(x)
    x = Dense(4096, activation='relu', name='fc2')(x)
    x = Dense(num_classes, activation='softmax', name='predictions')(x)

    # Create model
    model = Model(inputs=img_input, outputs=x, name='vgg16')

    return model

In [3]:


# 모델 생성
model = VGG16(input_shape=(224, 224, 3), num_classes=1000)
model.summary()


In [4]:
import torch
import torch.nn as nn

In [5]:


class VGG16(nn.Module):
    def __init__(self, num_classes=1000):
        super(VGG16, self).__init__()

        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 2
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 3
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 4
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Block 5
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [6]:
# 모델 생성
model = VGG16(num_classes=1000)

In [7]:


# 모델 요약
print(model)


VGG16(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation

In [9]:
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total number of parameters: {total_params}")

    for name, parameter in model.named_parameters():
        if parameter.requires_grad:
            print(f"{name}: {parameter.numel()} parameters")

In [10]:
count_parameters(model)

Total number of parameters: 138357544
features.0.weight: 1728 parameters
features.0.bias: 64 parameters
features.2.weight: 36864 parameters
features.2.bias: 64 parameters
features.5.weight: 73728 parameters
features.5.bias: 128 parameters
features.7.weight: 147456 parameters
features.7.bias: 128 parameters
features.10.weight: 294912 parameters
features.10.bias: 256 parameters
features.12.weight: 589824 parameters
features.12.bias: 256 parameters
features.14.weight: 589824 parameters
features.14.bias: 256 parameters
features.17.weight: 1179648 parameters
features.17.bias: 512 parameters
features.19.weight: 2359296 parameters
features.19.bias: 512 parameters
features.21.weight: 2359296 parameters
features.21.bias: 512 parameters
features.24.weight: 2359296 parameters
features.24.bias: 512 parameters
features.26.weight: 2359296 parameters
features.26.bias: 512 parameters
features.28.weight: 2359296 parameters
features.28.bias: 512 parameters
classifier.0.weight: 102760448 parameters
class

In [None]:
!wget https://github.com/JSJeong-me/Generate_AI_for_Image/raw/main/images/cat1.png
!wget https://github.com/JSJeong-me/Generate_AI_for_Image/raw/main/images/cat2.jpg
!wget https://github.com/JSJeong-me/Generate_AI_for_Image/raw/main/images/rocket.jpg


In [12]:
import torch
import torch.nn as nn
from torchvision import models

In [13]:
# 사전 학습된 VGG16 모델 불러오기
model = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:07<00:00, 75.6MB/s]


In [14]:
# VGG16 모델의 마지막 레이어(분류기) 수정
model.classifier[6] = nn.Linear(in_features=4096, out_features=2)

In [15]:
# 수정된 모델 확인
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [16]:
# 모델 테스트: 임의의 입력 이미지(224x224)를 모델에 전달하여 출력 확인
# 랜덤한 입력 데이터 생성
input_data = torch.randn(1, 3, 224, 224)

# 출력 확인
output = model(input_data)
print(output)


tensor([[-0.3129,  0.2644]], grad_fn=<AddmmBackward0>)


In [None]:
import torch
from torchvision import models, transforms
from PIL import Image

# 이미지 경로
image_path = './cat1.png'

# 사전 학습된 VGG16 모델 불러오기
model = models.vgg16(pretrained=True)

# 모델을 평가 모드로 설정
model.eval()

# 이미지 전처리
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 이미지 불러오기 및 전처리 적용
img = Image.open(image_path).convert("RGB")
img_tensor = preprocess(img)

# 배치 차원 추가 (모델에 입력하기 위해)
img_tensor = img_tensor.unsqueeze(0)

# 이미지 모델에 입력
with torch.no_grad():
    output = model(img_tensor)

# 결과 확인
print(output)


In [20]:
import torch
from torchvision import models, transforms
from PIL import Image
import requests
import json

# 이미지 경로
image_path = './cat1.png'

# 사전 학습된 VGG16 모델 불러오기
model = models.vgg16(pretrained=True)

# 모델을 평가 모드로 설정
model.eval()

# 이미지 전처리
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 이미지 불러오기 및 전처리 적용
img = Image.open(image_path).convert("RGB")
img_tensor = preprocess(img)

# 배치 차원 추가 (모델에 입력하기 위해)
img_tensor = img_tensor.unsqueeze(0)

# 이미지 모델에 입력하여 예측
with torch.no_grad():
    output = model(img_tensor)

# 소프트맥스 확률 계산
probabilities = torch.nn.functional.softmax(output[0], dim=0)

# ImageNet 클래스 레이블 불러오기
LABELS_URL = "https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json"
response = requests.get(LABELS_URL)
labels = json.loads(response.text)

# 최종 예측 결과 출력 (상위 5개 클래스)
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
    print(f"{labels[top5_catid[i]]}: {top5_prob[i].item() * 100:.2f}%")


tabby cat: 38.66%
Egyptian Mau: 27.52%
tiger cat: 27.45%
lynx: 4.06%
bow tie: 0.48%
