# Deep learning tests

## Load libraries and data

In [1]:
import scipy.io as scio
from scipy.interpolate import CubicSpline
import numpy as np 
import matplotlib.pyplot as plt
import sklearn.datasets
import sklearn.linear_model
import torch
import torch.nn
import torch.nn.functional
import torch.utils.data
import tqdm

In [2]:
data3_1 = scio.loadmat('tissue3_1.mat')
data3_2 = scio.loadmat('tissue3_2.mat')
data4 = scio.loadmat('tissue4.mat')
data5 = scio.loadmat('tissue5.mat')
data6 = scio.loadmat('tissue6.mat')
data14 = scio.loadmat('tissue14.mat')
data24 = scio.loadmat('tissue24.mat')

In [3]:
for key, item in data3_1.items():
    print(key)

__header__
__version__
__globals__
x_c
IDX
spC
dermis
k
m
dye
infl
bcc
map_t3
map_t3_c
map_t3_svd
tissue_t3_1
ID


In [11]:
int_tissue_5 = data3_1['tissue_t3_1'].copy()
for i in range(200):
    for j in range(200):
        int_tissue_5[i,j] = int(data5['tissue_t5_1'][i,j]) 

tissue_targets = [
    (data3_1['map_t3'], data3_1['tissue_t3_1']),
    (data3_2['map_t3'], data3_2['tissue_t3_2']),
    (data4['map_t4'],   data4['tissue_t4']    ),
    (data5['map_t5_1'], int_tissue_5  ),
    (data6['map_t6'],   data6['tissue_t6']    ),
    (data14['map_t14'], data14['tissue_t14']  ),
    (data24['map_t24'], data24['tissue_t24']  )
]

In [6]:
### REDUCE NUMBERS TO SET NUMB
tissue_targets = []

for (data, target, _) in tissue_targets_pre:
    this_set = set(target.flatten())
    this_tuples = []
    this_target = target.copy()
    count = 0
    count_numb = 0

    for num in this_set:
        this_tuples.append((num,count_numb))
        count_numb += 1

    for i in range(target.shape[0]):
        for j in range(target.shape[1]):
            for (num, new_num) in this_tuples:
                if int(target[i,j]) == int(num):
                    this_target[i,j] = new_num
    
    tissue_targets.append((data, this_target, len(this_set)))
    count += 1

NameError: name 'tissue_targets_pre' is not defined

## Fake data

In [None]:
fake_data = tissue_data.copy()
import random 
p = 0.9

for i in range(200):
    for j in range(200):
        this_spectra = np.zeros(1024)

        for k in range(128):
            if random.random() < p:
                this_spectra[k] = target_simple[i,j]
            else:
                this_spectra[k] = random.randint(0,5)
        
        fake_data[i,j,:] = this_spectra

NameError: name 'tissue_data' is not defined

## Define Network

In [7]:
class ConvolutionalNeuralNetwork(torch.nn.Module):
    def __init__(self,output_size):
        super(ConvolutionalNeuralNetwork, self).__init__()
        self.conv1 = torch.nn.Conv1d(1,6,kernel_size=3,padding=0)      
        self.relu1 = torch.nn.ReLU()                                        
        self.pool1 = torch.nn.AvgPool1d(2, stride=2, padding=0)            
        self.conv2 = torch.nn.Conv1d(6,12,kernel_size=5,padding=0)     
        self.relu2 = torch.nn.ReLU()                                      
        self.pool2 = torch.nn.AvgPool1d(3, stride=2, padding=0)             
        self.conv3 = torch.nn.Conv1d(12,50,kernel_size=7,padding=0)

        self.flat = torch.nn.Flatten()                                     
        self.drop = torch.nn.Dropout(p=0.2)

        self.fc1 = torch.nn.Linear(12350, 150) 
        self.relu4 = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(150, output_size)
        self.softmax = torch.nn.Softmax(dim=1)
    
    def forward(self, x):
        output = self.conv1(x)
        output = self.relu1(output)
        output = self.pool1(output)
        output = self.conv2(output)
        output = self.relu2(output)
        output = self.pool2(output)
        output = self.conv3(output)
    
        output = self.flat(output)
        output = self.drop(output) 

        output = self.fc1(output)
        output = self.relu4(output)
        output = self.fc2(output)
        output = self.softmax(output)
        return output

## Model Training

In [8]:
def run_model(model, data, target, w, h, section=0, segment_size=1024, learning_rate=0.1, num_epochs=50):
        if w >= h:
                size = h
        if h > w:
                size = w
        
        X = []
        for i in range(size):
                for j in range(size):
                        X.append(data[i,j,:])
                
        y = []
        for i in range(size):
                for j in range(size):
                        y.append(target[i,j])
        
        # Reshape data for test & train
        X_train, X_test, y_train, _ = sklearn.model_selection.train_test_split(X, y, test_size=.2, shuffle=False)
        scaler = sklearn.preprocessing.StandardScaler()

        X_train = scaler.fit_transform(X_train)
        X_train = X_train.reshape(int(size*size*0.8),1,-1)
        X_test = scaler.transform(X_test)
        X_test = X_test.reshape(int(size*size*0.2),1,-1)

        tensor_X_train = torch.Tensor(X_train)
        tensor_X_test = torch.Tensor(X_test)
        tensor_y_train = torch.Tensor(y_train).long()

        dataset = torch.utils.data.TensorDataset(tensor_X_train, tensor_y_train)

        data_loader = torch.utils.data.DataLoader(dataset, batch_size=None,
                sampler=torch.utils.data.BatchSampler(torch.utils.data.RandomSampler(dataset), batch_size=30000, drop_last=False))

        # Create and train model
        criterion = torch.nn.CrossEntropyLoss() 
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
        model.train();

        loss_history = []
        for _ in tqdm.trange(num_epochs):
                for _, (inputs, targets) in enumerate(data_loader):
                        optimizer.zero_grad()
                        y_pred = model(inputs)
                        loss = criterion(y_pred, targets)
                        loss.backward()
                        optimizer.step()
                        loss_history.append(loss.item())

        # Evaluate model
        model.eval();
        y_pred_train = model(tensor_X_train) # 32000
        y_pred_test = model(tensor_X_test)   #  8000

        # Create image from results
        results = []

        for item in y_pred_train:
                array = item.detach().numpy()
                index = np.argmax(array)
                results.append(index)

        for item in y_pred_test:
                array = item.detach().numpy()
                index = np.argmax(array)
                results.append(index)
        
        approximation = np.zeros((size,size))
        for i in range(size):
                for j in range(size):
                        index = i*size + j
                        approximation[i,j] = results[index]
        
        # Plot images
        plt.figure(figsize=(10,5))
        plt.imshow(approximation)
        plt.title(str(section*segment_size)+'-'+str((section+1)*segment_size))
        plt.show()


## Run on different sections

Relevant segments seem to be: (60,75), (370,385), (450,465), (665,680) and (800,850)

I can't train the same model all the time since I am using a different number of outputs each time...
Should I use a constant number to train the model more extensively? 

In [10]:
model = ConvolutionalNeuralNetwork(11)

for (data, target) in tissue_targets:
    run_model(model, data,target,target.shape[0],target.shape[1])

In [12]:
for (data, target) in tissue_targets:
    run_model(model, data,target,target.shape[0],target.shape[1])

  0%|          | 0/50 [00:00<?, ?it/s]

torch.Size([30000, 1, 1024])


  0%|          | 0/50 [00:05<?, ?it/s]


KeyboardInterrupt: 