### Imports

In [None]:
import h5py
import pandas as pd 
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, minmax_scale
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from scipy import stats
np.random.seed(10)

### Data Reading and Normalization

In [None]:
f = h5py.File(r'C:\Users\lbg\Desktop\HE0435_NMC_1e5_curvesum.h5', 'r')
X =f['X']
Y =f['y']
scale = True



x = np.asarray(X)
scaler = MinMaxScaler(feature_range=(-1,1))
y = scaler.fit_transform(np.expand_dims(np.array(Y),1))
# temp = y>5
# x = x[temp]
# y = y[temp]
# fct = np.max(y)/2
# y = y/fct



perc_tr = np.random.choice(x.shape[0],95000,replace = False)
perc_ts = np.setdiff1d(np.arange(0,x.shape[0]),perc_tr)



if scale:
    X_n = minmax_scale(x,axis = 1)
else:
    X_n = x



X_tr, Y_tr = X_n[perc_tr], y[perc_tr,:]
X_ts, Y_ts = X_n[perc_ts], y[perc_ts,:]



X_tr = np.expand_dims(X_tr,1)
X_ts = np.expand_dims(X_ts,1)

### Class Generating Pytorch Dataset

In [None]:
class dataset_CNN(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, dataset,  labels, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.dataset = dataset
        self.transform = transform
        self.label = labels

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        data = self.dataset[idx, :].astype('float32')
        label1 = self.label[idx, :]
        data = torch.from_numpy(data)
        label1 = torch.from_numpy(label1.astype('float32'))

        if self.transform:
            data = self.transform(data)

        return data,label1

### Class generating Batch Iterators

In [None]:
def dataset_loader(X_tr, Y_tr, X_ts, Y_ts, batch_size = 500, valid_size = 0.20):
    dataset_train = dataset_CNN(X_tr, Y_tr)
    dataset_test_h = dataset_CNN(X_ts, Y_ts)

    num_train = len(dataset_train)
    num_test_h = len(dataset_test_h)
    indices = list(range(num_train))
    test_idx_h = list(range(num_test_h))
    np.random.shuffle(test_idx_h)
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    test_sampler_h = SubsetRandomSampler(test_idx_h)


    train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size,
        sampler=train_sampler, num_workers=0)
    valid_loader = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, 
        sampler=valid_sampler, num_workers=0)
    test_loader_h = torch.utils.data.DataLoader(dataset_test_h, batch_size=batch_size, 
        sampler=test_sampler_h, num_workers=0)

    return train_loader, valid_loader, test_loader_h, valid_idx, train_idx

### Convolutioanl Neural Network module


In [None]:
class CNN(nn.Module):
    def __init__(self, input_size, H, num_conv, num_linear, n_filters, k_size, drop = 0.3):
        super(CNN, self).__init__()
        self.H = H
        self.num_linear = num_linear
        self.num_conv = num_conv
        self.drop = drop       
        self.n_filters = n_filters
        self.k_size = k_size
        self.input_size = input_size
        
        self.conv1 = nn.Conv1d(1, self.n_filters, self.k_size, padding=1)
        self.bn1 = nn.BatchNorm1d(num_features=self.n_filters)
        self.dropout1 = nn.Dropout(self.drop)
        self.size_out = self.size_calc(self.input_size) 
        self.bn = torch.nn.ModuleList([torch.nn.BatchNorm1d(num_features = self.n_filters) for i in range(self.num_conv)])
        self.dropout = torch.nn.ModuleList([torch.nn.Dropout(self.drop) for i in range(self.num_conv)])
        self.dropout_lin = torch.nn.ModuleList([torch.nn.Dropout(self.drop) for i in range(self.num_linear)])
        self.linears = torch.nn.ModuleList([torch.nn.Linear(self.H, self.H) for i in range(self.num_linear)])
        self.convs = torch.nn.ModuleList([torch.nn.Conv1d(self.n_filters, self.n_filters, self.k_size, padding = 1) for i in range(self.num_conv)])
        for i in range(len(self.convs)):
            self.size_out = self.size_calc(self.size_out)
        self.size_out = self.size_out*self.n_filters
        self.linear_flat = nn.Linear(self.size_out,self.H)
        self.linear_final = nn.Linear(self.H, 1)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        #x = F.relu((self.conv1(x)))
        x = self.dropout1(x)
        for i, l in enumerate(self.convs):
            x = F.relu(self.bn[i](self.convs[i](x)))
            #x = F.relu((self.convs[i](x)))
            x = self.dropout[i](x)
        x = x.view(-1, x.shape[2]*self.n_filters)
        x = self.linear_flat(x)
        for m,n in enumerate(self.linears):
            x = F.relu(self.linears[m](x))
            x = self.dropout_lin[m](x)
        x = F.tanh(self.linear_final(x))
        return x
    
    def size_calc(self, size_in):
        size_out = size_in +3 -self.k_size
        return size_out

### Training Function

In [None]:
def train_non_CNN(train_loader, valid_loader, input_size,  H=500, ler =0.001, wd = 0, nl = 3, nc = 3, nf = 9, ks = 3, dr = 0.3, n_epochs = 300,  direc= r'C:\Users\lbg\OneDrive - CSEM S.A\Bureau\H0_new\QSO-timedelay'):
    model = CNN(input_size, H, nc, nl, nf, ks, drop = dr)
    model.float().cuda()
    print(model)

    criterion = nn.MSELoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=ler, weight_decay = wd)
    valid_loss_min = np.Inf 
    for epoch in range(1, n_epochs+1):
        train_loss = 0.0
        valid_loss = 0.0
        model.train()
        for data, target in train_loader:
            data = data.cuda()
            target = target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*data.size(0)

        model.eval()
        for data, target in valid_loader:
            data = data.cuda()
            target=target.cuda()
            output = model(data)
            loss = criterion(output, target)
            valid_loss += loss.item()*data.size(0)

        train_loss = train_loss/len(train_loader.sampler)
        valid_loss = valid_loss/len(valid_loader.sampler)

        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, valid_loss))
        
        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(), direc)
            valid_loss_min = valid_loss
    return model


def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        

### Hyperparameter selection

In [None]:
train_loader, valid_loader, test_loader_h, valid_idx, train_idx = dataset_loader(X_tr, Y_tr, X_ts, Y_ts, batch_size = 1000, valid_size = 0.20)
H_ = [150]
lr_ = [0.001]
nl_ = [1]
convs_ = [5]
filters_ = [6]
sizes_ = [9]

counter = 1
for convs in convs_:
    for filters in filters_:     
        for sizes in sizes_:
            for h in H_:
                for l in lr_:
                    for lay in nl_:
                        print('###################### Starting iteration number: ', counter, '######################')
                        print()
                        name = r'C:\Users\lbg\Desktop\Pol_{0}_{1}_{2}_{3}_{4}_{5}_100_epochs_bn.pt'.format(str(convs),str(filters),str(sizes),str(h), str(l), str(lay))
                        model = train_non_CNN(train_loader, valid_loader, X_tr.shape[2],  H=h, ler =l, wd = 0, nl = lay, nc = convs, nf = filters, ks = sizes, dr = 0.3, n_epochs = 1500, direc= name)
                        model.apply(init_weights)
                        counter += 1
                        print(convs)

### Test model

In [None]:
model.load_state_dict(torch.load('Pol_5_6_9_150_0.001_1_100_epochs_bn.pt'))
model.eval()
loss = 0
cnt = 0
for i in range(len(X_ts)):
    n = i
    pred = scaler.inverse_transform(np.expand_dims(model(torch.from_numpy(np.expand_dims(X_ts[n],0).astype('float32')).cuda()).detach().cpu().numpy()[0],1))
#     print('Prediction:' ,pred)
#     print('Ground_Truth: ', Y_ts[n][0]*fct)
    loss += np.power(pred-scaler.inverse_transform(np.expand_dims(Y_ts[n],1)),2)
    cnt += 1
#     print()
    
print('Test RMSE: ', np.sqrt(loss/cnt))

In [None]:
print('Test RMSE: ', np.sqrt(loss/cnt))

In [None]:
model.eval()
loss = 0
cnt = 0
p = []
g = []
for i in range(len(X_ts)):
    n = i
    pred = scaler.inverse_transform(np.expand_dims(model(torch.from_numpy(np.expand_dims(X_ts[n],0).astype('float32')).cuda()).detach().cpu().numpy()[0],1))
    #print('Prediction:' ,pred)
    p.append(pred)
    g.append(scaler.inverse_transform(np.expand_dims(Y_ts[n],1)))
    #print('Ground_Truth: ', scaler.inverse_transform(np.expand_dims(Y_ts[n],1)))
#    loss += np.power(pred-Y_ts[n][0]*fct,2)
#    cnt += 1
    print()
    
#print('Test RMSE: ', np.sqrt(loss/cnt))

In [None]:
cnt = 0
for i in range(len(g)):
    if np.abs(p[i][0][0]-g[i][0][0])<5:
        print(p[i][0][0], g[i][0][0])
        cnt += 1

### Error Calculation

In [None]:
pred = np.array([p[i][0][0] for i in range(len(p))])
gt = np.array([g[i][0][0] for i in range(len(g))])


errors = np.abs(pred-gt)
maxs = np.max(gt)
mins = np.min(gt)

mean_errors = stats.binned_statistic(gt,errors, statistic = 'mean')
std_errors = stats.binned_statistic(gt,errors, statistic = 'std')





In [None]:
temp = mean_errors[1]+5
new_means = temp[:-1]
new_means

In [None]:
plt.errorbar(new_means,mean_errors[0],std_errors[0])
max_sys_m = mean_errors[0][1:].max()
max_sys_s = std_errors[0][1:].max()
print(max_sys_m)
print(max_sys_s)
print(np.sqrt(max_sys_s**2+max_sys_m**2))

In [None]:
plt.scatter(pred,gt, s = 0.1)
x = np.arange(0,100)
y = x
plt.plot(x,y)

### Test on real curve

In [None]:
hope = np.loadtxt('HE0435_B_sum_D.txt')
scaler3 = MinMaxScaler()
new = scaler3.fit_transform(np.expand_dims(hope,axis =1)).T

In [None]:
scaler.inverse_transform(np.expand_dims(model(torch.from_numpy(np.expand_dims(new,0).astype('float32')).cuda()).detach().cpu().numpy()[0],1))