In [1]:
import tensorflow.compat.v1 as tf1
# import tensorflow as tf2
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf1.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 1
session = tf1.Session(config=config)

2024-12-05 00:53:25.592339: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-12-05 00:53:25.737708: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-05 00:53:25.763945: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-05 00:53:26.280631: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import math

class SPP_NET(nn.Module):
    def __init__(self, opt, input_nc, ndf=64, gpu_ids=[]):
        super(SPP_NET, self).__init__()
        self.gpu_ids = gpu_ids
        self.output_num = [4, 2, 1]

        self.conv1 = nn.Conv2d(input_nc, ndf, 4, 2, 1, bias=False)
        self.LReLU1 = nn.LeakyReLU(negative_slope=0.2)

        self.conv2 = nn.Conv2d(ndf, ndf * 2, 4, 1, 1, bias=False)
        self.BN1 = nn.BatchNorm2d(ndf * 2)

        self.conv3 = nn.Conv2d(ndf * 2, ndf * 4, 4, 1, 1, bias=False)
        self.BN2 = nn.BatchNorm2d(ndf * 4)

        self.conv4 = nn.Conv2d(ndf * 4, ndf * 8, 4, 1, 1, bias=False)
        self.BN3 = nn.BatchNorm2d(ndf * 8)

        self.conv5 = nn.Conv2d(ndf * 8, 64, 4, 1, 0, bias=False)

        # Fully connected layers
        self.fc1 = nn.Linear(86016, 4096)  # Adjust this size to match spp output size
        self.fc2 = nn.Linear(4096, 1000)

    def forward(self, x):
        device = x.device  # 텐서가 있는 장치 정보 가져오기

        # Conv Layers
        x = self.conv1(x)
        x = self.LReLU1(x)

        x = self.conv2(x)
        x = self.LReLU1(self.BN1(x))

        x = self.conv3(x)
        x = self.LReLU1(self.BN2(x))

        x = self.conv4(x)

        # SPP Layer
        spp = self.spatial_pyramid_pool(x, 1, [int(x.size(2)), int(x.size(3))], self.output_num)
        
        # Check the shape of spp
        print("SPP shape:", spp.shape)  # 확인

        # Fully connected layers
        fc1 = self.fc1(spp.view(spp.size(0), -1))  # Flatten the output before passing to fc1
        fc2 = self.fc2(fc1)

        s = nn.Sigmoid()
        output = s(fc2)

        return output

    def spatial_pyramid_pool(self, previous_conv, num_sample, previous_conv_size, out_pool_size):
        '''Define spatial pyramid pooling here'''
        
        # GPU로 텐서를 이동
        device = previous_conv.device
    
        spp = None
        for i in range(len(out_pool_size)):
            h_wid = int(math.ceil(previous_conv_size[0] / out_pool_size[i]))
            w_wid = int(math.ceil(previous_conv_size[1] / out_pool_size[i]))
            h_pad = int((h_wid * out_pool_size[i] - previous_conv_size[0] + 1) / 2)
            w_pad = int((w_wid * out_pool_size[i] - previous_conv_size[1] + 1) / 2)
            
            maxpool = nn.MaxPool2d((h_wid, w_wid), stride=(h_wid, w_wid), padding=(h_pad, w_pad))
            x = maxpool(previous_conv)
            
            if i == 0:
                spp = x.view(num_sample, -1)
            else:
                spp = torch.cat((spp, x.view(num_sample, -1)), 1)
        
        return spp.to(device)  # 최종 출력도 동일한 장치로 반환

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np

# 입력 이미지 크기 설정
batch_size = 8
input_channels = 3  # 예를 들어, RGB 이미지
image_height = 1000
image_width = 2500

# 입력 텐서 생성 (배치 크기 8, 3 채널, 64x64 크기의 이미지)
input_tensor = torch.randn(batch_size, input_channels, image_height, image_width)

# 모델 초기화
opt = None  # 옵션은 현재 코드에서 사용되지 않으므로 None으로 설정
model = SPP_NET(opt, input_nc=input_channels)

# 모델과 입력 텐서를 동일한 장치로 이동시키기
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델을 device로 이동
model = model.to(device)

# 입력 텐서를 device로 이동
input_tensor = input_tensor.to(device)

# 모델 실행
output = model(input_tensor)

# 결과 출력
print("Output shape:", output.shape)  # 예측된 출력 텐서의 크기
print("Output:", output)  # 예측된 출력값

SPP shape: torch.Size([1, 86016])
Output shape: torch.Size([1, 1000])
Output: tensor([[0.5759, 0.5669, 0.4536, 0.3816, 0.4889, 0.6272, 0.5306, 0.5080, 0.5401,
         0.3916, 0.4387, 0.6181, 0.4787, 0.5763, 0.4875, 0.5882, 0.4196, 0.4126,
         0.4589, 0.3978, 0.4634, 0.5243, 0.5375, 0.5669, 0.4812, 0.5593, 0.5858,
         0.6218, 0.2366, 0.4702, 0.5190, 0.5762, 0.5054, 0.5091, 0.4384, 0.5772,
         0.4983, 0.4799, 0.5230, 0.5686, 0.3098, 0.5304, 0.7188, 0.4653, 0.6383,
         0.4807, 0.5588, 0.5048, 0.5314, 0.6031, 0.3872, 0.4631, 0.4147, 0.3740,
         0.3253, 0.5423, 0.4415, 0.3990, 0.3955, 0.3746, 0.4197, 0.4216, 0.4979,
         0.5411, 0.5646, 0.4923, 0.6237, 0.4366, 0.3955, 0.5133, 0.4900, 0.5011,
         0.4818, 0.5194, 0.3149, 0.6630, 0.4230, 0.6218, 0.4500, 0.5959, 0.3553,
         0.5351, 0.3687, 0.5671, 0.6232, 0.4131, 0.4010, 0.4782, 0.4079, 0.4814,
         0.4878, 0.4547, 0.5461, 0.3632, 0.4229, 0.5846, 0.4360, 0.5362, 0.4121,
         0.4777, 0.5558, 0.5931