In [5]:
import numpy as np
import pandas as pd
import torch
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader
import time
import torch.nn.functional as F
import torch.nn as nn

# res18

In [2]:
res18 = models.resnet18(pretrained = False)
res18.conv1 = nn.Conv2d(1,64,kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
res18.fc = nn.Sequential(
           nn.Linear(in_features=512, out_features=100, bias=True),
           nn.ReLU(inplace=True),
           nn.Dropout(p=0.5, inplace=False),
           nn.Linear(in_features=100, out_features= 10, bias=True))

### Load model parameters

In [7]:
res18.load_state_dict(torch.load('model/res18_epoch5.pt'))
res18.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [29]:
res18(torch.zeros([1, 1, 256, 256]))

tensor([[ 5.1367,  5.4201, -1.5383, -1.6602, -1.7947, -2.0198, -1.7400, -1.2598,
         -1.9788, -2.0492]], grad_fn=<AddmmBackward>)

In [192]:
#res18 = models.resnet18(pretrained = True)

### load data

In [193]:
data_transforms = transforms.Compose([transforms.Resize([256,256]),
                                      transforms.Grayscale(),
                                      #transforms.CenterCrop(100),
                                      transforms.ToTensor(),
                                     ])
                             
#image_datasets = {x:datasets.ImageFolder(os.path.join('CT', x), transform = data_transforms) for x in ['train','test']}
image_datasets = {x:datasets.ImageFolder('CT/' + x, transform = data_transforms) for x in ['train','test']}

In [194]:
train = image_datasets['train']

In [195]:
train[1][0].size()

torch.Size([3, 256, 256])

In [80]:
# ## transfer data(feature and y) to df
# li = list(map(lambda x: res18(x[0].view(1,1,256,256))[0].data.numpy(), train))
# df = pd.DataFrame(li)
# df['y'] = list(map(lambda x: x[1],train))
# df.head()

646

In [196]:
data = {}
for i in ['train','test']:
    lst = list(map(lambda x: res18(x[0].view(1,1,256,256))[0].data.numpy(), image_datasets[i]))
    df = pd.DataFrame(lst)
    df['y'] = list(map(lambda x: x[1],image_datasets[i]))
    data[i] = df

In [199]:
data['train'].to_csv('train.csv', index=False)

In [200]:
data['test'].to_csv('test.csv', index=False)

# main used

# Toy model 3

In [35]:
##########################
### MODEL
##########################
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001

# Architecture
NUM_CLASSES = 2

DEVICE = "cpu"
#num_classes = len(image_datasets['train'].classes)

##########################
### MODEL
##########################

class ConvNet3(nn.Module):

    def __init__(self, num_classes=10):
        super(ConvNet3, self).__init__()
        
        #### YOUR CODE
        self.conv1 = nn.Conv2d(1, 64,    kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.MaxPool2d(2)
        
        self.conv2 = nn.Conv2d(64, 192, kernel_size=5, stride = 1, padding = 2)
        self.pool2 = nn.MaxPool2d(2)
        self.dropout2 = nn.Dropout2d(0.2)
        self.batch2 = nn.BatchNorm2d(192)
        
        self.conv3 = nn.Conv2d(192, 384, kernel_size = 3, stride = 1, padding = 1)
        self.pool3 = nn.MaxPool2d(2)
        self.dropout3 = nn.Dropout2d(0.2)
        self.batch3 = nn.BatchNorm2d(384)
        
        self.conv4 = nn.Conv2d(384, 256, kernel_size = 5, stride = 1, padding = 2)
        self.pool4 = nn.MaxPool2d(2)
        self.dropout4 = nn.Dropout2d(0.2)
        self.batch4 = nn.BatchNorm2d(256)
        
        self.conv5 = nn.Conv2d(256, 256, kernel_size = 5, stride = 1, padding = 2)
        self.pool5 = nn.MaxPool2d(2)
        self.dropout5 = nn.Dropout2d(0.2)
        self.batch5 = nn.BatchNorm2d(256)
        
        ## 256*256 adjuste by print size before linear 1
        self.linear1 = nn.Linear(256 * 8 * 8, 4096)
        self.lin_drop_1 = nn.Dropout(0.5)
        self.lin_batch_1 = nn.BatchNorm1d(4096)
        
        self.linear2 = nn.Linear(4096, 512)
        self.lin_drop_2 = nn.Dropout(0.5)
        self.lin_batch_2 = nn.BatchNorm1d(512)
        
        self.linear3 = nn.Linear(512, 10)
        #self.lin_drop_3 = nn.Dropout(0.5)
        #self.lin_batch_3 = nn.BatchNorm1d(10)
        
        self.linear4 = nn.Linear(10, num_classes) 

    def forward(self, x):

        #### YOUR CODE
        out_1 = self.conv1(x)
        out_1 = torch.relu(out_1)
        out_1 = self.pool1(out_1)
        
        out_2 = self.conv2(out_1)
        out_2 = self.batch2(out_2)
        out_2 = self.dropout2(out_2)
        out_2 = torch.relu(out_2)
        out_2 = self.pool2(out_2)
        
        out_3 = self.conv3(out_2)
        out_3 = self.batch3(out_3)
        out_3 = self.dropout3(out_3)
        out_3 = torch.relu(out_3)
        out_3 = self.pool3(out_3)
        
        out_4 = self.conv4(out_3)
        out_4 = self.batch4(out_4)
        out_4 = self.dropout4(out_4)
        out_4 = torch.relu(out_4)
        out_4 = self.pool4(out_4)
        
        out_5 = self.conv5(out_4)
        out_5 = self.batch5(out_5)
        out_5 = self.dropout5(out_5)
        out_5 = self.pool5(out_5)
        out_5 = torch.relu(out_5) 
        #print(out_5.size())
        out_5 = out_5.view(-1,256 * 8 * 8)  ### -1 is adjusted batch size
        #print(out_5.size())
        
        out_6 = self.linear1(out_5)
        #print(out_6.size())
        out_6 = self.lin_batch_1(out_6)
        out_6 = torch.relu(out_6)
        out_6 = self.lin_drop_1(out_6)
        #print(out_6.size())
        
        out_7 = self.linear2(out_6)
        out_7 = self.lin_batch_2(out_7)
        out_7 = torch.relu(out_7)
        out_7 = self.lin_drop_2(out_7)
        #print(out_7.size())
        
        out_8 = self.linear3(out_7)
        #out_8 = self.lin_batch_3(out_8)
        out_8 = torch.relu(out_8)
        #out_8 = self.lin_drop_3(out_8)
        
        out_9 = self.linear4(out_8)
        
        
        logits = out_9
        probas = F.softmax(logits, dim=1)
        return logits, probas
    
    def extract(self, x):

        #### YOUR CODE
        out_1 = self.conv1(x)
        out_1 = torch.relu(out_1)
        out_1 = self.pool1(out_1)
        
        out_2 = self.conv2(out_1)
        out_2 = self.batch2(out_2)
        out_2 = self.dropout2(out_2)
        out_2 = torch.relu(out_2)
        out_2 = self.pool2(out_2)
        
        out_3 = self.conv3(out_2)
        out_3 = self.batch3(out_3)
        out_3 = self.dropout3(out_3)
        out_3 = torch.relu(out_3)
        out_3 = self.pool3(out_3)
        
        out_4 = self.conv4(out_3)
        out_4 = self.batch4(out_4)
        out_4 = self.dropout4(out_4)
        out_4 = torch.relu(out_4)
        out_4 = self.pool4(out_4)
        
        out_5 = self.conv5(out_4)
        out_5 = self.batch5(out_5)
        out_5 = self.dropout5(out_5)
        out_5 = self.pool5(out_5)
        out_5 = torch.relu(out_5) 
        #print(out_5.size())
        out_5 = out_5.view(-1,256 * 8 * 8)  ### -1 is adjusted batch size  ## important so it's complicated to use method in nn.Sequential https://discuss.pytorch.org/t/how-to-extract-features-of-an-image-from-a-trained-model/119/3
        #print(out_5.size())
        
        out_6 = self.linear1(out_5)
        #print(out_6.size())
        out_6 = self.lin_batch_1(out_6)
        out_6 = torch.relu(out_6)
        out_6 = self.lin_drop_1(out_6)
        #print(out_6.size())
        
        out_7 = self.linear2(out_6)
        out_7 = self.lin_batch_2(out_7)
        out_7 = torch.relu(out_7)
        out_7 = self.lin_drop_2(out_7)
        #print(out_7.size())
        
        out_8 = self.linear3(out_7)
        #out_8 = self.lin_batch_3(out_8)
        #out_8 = torch.relu(out_8)
        #out_8 = self.lin_drop_3(out_8)
        
        #out_9 = self.linear4(out_8)
        
        
        #logits = out_9
        #probas = F.softmax(logits, dim=1)
        return out_8


    
torch.manual_seed(RANDOM_SEED)

model3 = ConvNet3(NUM_CLASSES)
model3.to(DEVICE)

optimizer = torch.optim.Adam(model3.parameters(), lr=LEARNING_RATE)

In [10]:
model3(torch.zeros([2, 1, 256, 256]))

(tensor([[-0.0624, -0.3482],
         [ 0.0862,  0.0526]], grad_fn=<AddmmBackward>),
 tensor([[0.5710, 0.4290],
         [0.5084, 0.4916]], grad_fn=<SoftmaxBackward>))

In [36]:
model3.extract(torch.zeros([2, 1, 256, 256]))

tensor([[ 0.0537, -0.3711, -0.1043, -0.0624,  0.5342,  0.3770,  0.1038, -0.6191,
          0.9731, -0.0918],
        [-0.8433,  0.3586,  1.1264, -0.4315, -0.1242, -0.4194, -1.1390,  0.3371,
         -0.2284,  0.2729]], grad_fn=<AddmmBackward>)

### Load model parameters

In [37]:
model3.load_state_dict(torch.load('model/model3_para_epoch30.pt', map_location=torch.device('cpu')))
model3.eval()

ConvNet3(
  (conv1): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout2): Dropout2d(p=0.2, inplace=False)
  (batch2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout3): Dropout2d(p=0.2, inplace=False)
  (batch3): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): Conv2d(384, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (dropout4): Dropout2d(p=0.2, inplace=False)
  (batch4)

In [38]:
model3(torch.zeros([2, 1, 256, 256]))

(tensor([[-2.3009,  4.6892],
         [-2.3009,  4.6892]], grad_fn=<AddmmBackward>),
 tensor([[9.2015e-04, 9.9908e-01],
         [9.2015e-04, 9.9908e-01]], grad_fn=<SoftmaxBackward>))

In [39]:
model3.extract(torch.zeros([2, 1, 256, 256]))

tensor([[ 8.0935, -4.0854, -4.1818, -3.8889,  8.4424, -5.2277, -2.0680,  0.6597,
         -4.2362, -3.4136],
        [ 8.0935, -4.0854, -4.1818, -3.8889,  8.4424, -5.2277, -2.0680,  0.6597,
         -4.2362, -3.4136]], grad_fn=<AddmmBackward>)

### Load data

In [40]:
data_transforms = transforms.Compose([transforms.Resize([256,256]),
                                      transforms.Grayscale(),
                                      #transforms.CenterCrop(100),
                                      transforms.ToTensor(),
                                     ])
                             
#image_datasets = {x:datasets.ImageFolder(os.path.join('CT', x), transform = data_transforms) for x in ['train','test']}
image_datasets = {x:datasets.ImageFolder('CT/' + x, transform = data_transforms) for x in ['train','test']}

In [41]:
train = image_datasets['train']
#train[1][0].view(1,1,256,256)[0]
train[1][0].size()

torch.Size([1, 256, 256])

In [47]:
## transfer data(feature and y) to df
li = list(map(lambda x: model3.extract(x[0].view(1,1,256,256))[0].data.numpy().reshape(-1), train))
#df = pd.DataFrame(li)
#df['y'] = list(map(lambda x: x[1],train))
#df.head()

In [48]:
li[0].shape

(10,)

In [49]:
data = {}
for i in ['train','test']:
    lst = list(map(lambda x: model3.extract(x[0].view(1,1,256,256))[0].data.numpy().reshape(-1), image_datasets[i]))
    df = pd.DataFrame(lst)
    df['y'] = list(map(lambda x: x[1],image_datasets[i]))
    data[i] = df

In [50]:
data['train']

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,y
0,-5.493340,1.979221,5.452313,2.376600,-2.218290,-0.297500,-2.463372,-2.882947,3.689015,-3.339031,0
1,-1.176325,-0.024176,1.475685,0.602422,2.101266,-0.889643,-0.977984,-1.051256,1.076921,-0.077088,0
2,-2.055301,0.505205,2.598602,1.231147,0.399164,-0.236204,-1.028547,-1.181248,1.844412,0.637139,0
3,-3.687569,2.378079,7.798402,4.224250,-2.453051,1.493909,-1.960688,-2.135546,5.440852,2.422818,0
4,-4.934954,3.237987,10.771603,6.296090,-3.865475,2.072758,-2.735177,-2.709528,7.688848,1.484118,0
...,...,...,...,...,...,...,...,...,...,...,...
641,4.804821,-2.067938,-2.026330,-1.669337,5.694481,-3.133512,-1.433630,0.369400,-2.073154,-2.740248,1
642,3.176648,-1.120510,-0.873350,-0.638185,3.352316,-2.213702,-1.094906,0.036718,-0.942316,-1.928452,1
643,8.058622,-4.215203,-2.469227,-3.749281,6.763555,-5.871277,-2.440229,-0.194832,-4.219296,-2.010690,1
644,7.688506,-3.652898,-3.489372,-3.320179,7.352696,-5.093736,-1.863724,0.569460,-3.843305,-3.414030,1


In [51]:
data['train'].to_csv('toymodel3_extracted_10_feature_train.csv', index=False)

In [56]:
data['test'].to_csv('toymodel3_extracted_10_feature_test.csv', index=False)

# didn't use raw data, can neglect below

# load raw data

In [184]:
data_transforms = transforms.Compose([transforms.Resize([10,10]),
                                      transforms.Grayscale(),
                                      #transforms.CenterCrop(100),
                                      transforms.ToTensor(),
                                     ])
                             
#image_datasets = {x:datasets.ImageFolder(os.path.join('CT', x), transform = data_transforms) for x in ['train','test']}
image_datasets = {x:datasets.ImageFolder('CT/' + x, transform = data_transforms) for x in ['train','test']}

In [187]:
image_datasets['train'][0][0][0].numpy().reshape(-1)

(100,)

In [162]:
image_datasets['train'][0][1]

0

In [188]:
data = {}
for i in ['train','test']:
    lst = list(map(lambda x: x[0][0].numpy().reshape(-1), image_datasets[i]))
    df = pd.DataFrame(lst)
    df['y'] = list(map(lambda x: x[1],image_datasets[i]))
    data[i] = df

In [189]:
data['train'].to_csv('train_raw.csv', index=False)
data['test'].to_csv('test_raw.csv',index = False)

In [190]:
data

{'train':             0         1         2         3         4         5         6  \
 0    0.447059  0.796078  0.854902  0.780392  0.886275  0.933333  0.941176   
 1    0.721569  0.819608  0.784314  0.690196  0.819608  0.713726  0.596078   
 2    0.666667  0.819608  0.784314  0.709804  0.866667  0.768627  0.576471   
 3    0.933333  0.945098  0.682353  0.670588  0.929412  0.584314  0.349020   
 4    0.737255  0.909804  0.917647  0.901961  0.945098  0.921569  0.827451   
 ..        ...       ...       ...       ...       ...       ...       ...   
 641  0.011765  0.129412  0.529412  0.509804  0.576471  0.756863  0.729412   
 642  0.011765  0.145098  0.537255  0.584314  0.631373  0.733333  0.682353   
 643  0.133333  0.231373  0.250980  0.333333  0.313726  0.317647  0.454902   
 644  0.000000  0.003922  0.047059  0.121569  0.141176  0.141176  0.188235   
 645  0.000000  0.000000  0.027451  0.078431  0.098039  0.094118  0.121569   
 
             7         8         9  ...        91    

In [180]:
li = list(map(lambda x: x[0][0].numpy(), image_datasets['train']))

In [182]:
li[0]

(30, 30)