# HRS Spectrometer Neural Network Model

In [1]:
import  pandas as pd
import numpy as np
import sklearn
import math
import matplotlib
%matplotlib inline
import urllib
import os
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

## Prepare the data
| dataset | Coord | Intro                                                                                    | baseDir                                                                       |
| ------- | ----- | ---------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- |
| 1       | focal | \- focal plane coord<br>\- only x,y,theta,phi<br>\- used for Deep Neural Network | [http://216.197.71.44/Data/spectro_nn/focalPlane/EqEvt731/](http://216.197.71.44/Data/spectro_nn/focalPlane/EqEvt731/order0/) |
| 2       | det plane | \- detector plane coord<br>\- used for Deep Neural Network<br>\- **equal** event number | [http://216.197.71.44/Data/spectro_nn/DetPlane/EqEvt731/](http://localhost/Data/spectro_nn/DetPlane/EqEvt731/) |
| 3       | det plane | \- detector plane coord<br>\- used for Deep Neural Network<br>\- **Unequal** event number | [http://216.197.71.44/Data/spectro_nn/DetPlane/UnEqEvt731/](http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/) |

- **Two type of dataset**
Only x,y,theta,phi : PRex_DataSet_xxx.csv
All the combinations: PRex_DataSet_Ful_xxx.csv
### 1). Load the files

In [2]:
dataSouceBaseDir = "http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5/"

if urllib.request.urlopen(dataSouceBaseDir).getcode() !=200:
    dataSouceBaseDir = "http://216.197.71.44/Data/spectro_nn/DetPlane/EqEvt731/order5"


files = ["{}/PRex_DataSet_Full_2239.csv".format(dataSouceBaseDir),
         "{}/PRex_DataSet_Full_2240.csv".format(dataSouceBaseDir),
         "{}/PRex_DataSet_Full_2241.csv".format(dataSouceBaseDir),
         "{}/PRex_DataSet_Full_2244.csv".format(dataSouceBaseDir),
         "{}/PRex_DataSet_Full_2245.csv".format(dataSouceBaseDir),
         "{}/PRex_DataSet_Full_2256.csv".format(dataSouceBaseDir),
         "{}/PRex_DataSet_Full_2257.csv".format(dataSouceBaseDir)
         ]

li = []

for filename in files:
    print("Loading the file {}...".format(filename))
    df = pd.read_csv(filename)
    li.append(df)
data = pd.concat(li)
data.sample(frac=1) # sample the data with fraction 1. rearrange the data in random order
data.head()

Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2239.csv...
Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2240.csv...
Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2241.csv...
Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2244.csv...
Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2245.csv...
Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2256.csv...
Loading the file http://localhost/Data/spectro_nn/DetPlane/UnEqEvt731/order5//PRex_DataSet_Full_2257.csv...


Unnamed: 0,evtID,runID,CutID,SieveRowID,SieveColID,bpmX,bpmY,x0th0y0ph0,x0th0y0ph1,x0th0y0ph2,...,x3th1y0ph1,x3th1y1ph0,x3th2y0ph0,x4th0y0ph0,x4th0y0ph1,x4th0y1ph0,x4th1y0ph0,x5th0y0ph0,targCalTh,targCalPh
0,0,2239,136,3,5,0.003794,-0.000501,1,-0.013557,0.000184,...,1.16417e-07,7.36575e-09,-8.54832e-06,1.76914e-07,-2.39843e-09,-1.5175e-10,1.76113e-07,-3.62828e-09,-0.000817,-0.010117
1,1,2239,130,4,4,0.003794,-0.000501,1,-0.024061,0.000579,...,1.3239e-06,1.83925e-07,-5.37435e-05,2.15953e-06,-5.19596e-08,-7.2186e-09,2.10929e-06,-8.27845e-08,0.019571,-0.012569
2,2,2239,179,4,11,0.003794,-0.000501,1,0.009713,9.4e-05,...,-7.09827e-08,1.95905e-07,-7.17564e-06,1.45327e-07,1.41153e-09,-3.89567e-09,1.42691e-07,-2.83748e-09,0.012775,0.014835
3,3,2239,142,2,6,0.003794,-0.000501,1,-0.00925,8.6e-05,...,3.29741e-08,3.17149e-08,-3.5894e-06,5.39592e-08,-4.99117e-10,-4.80057e-10,5.43315e-08,-8.22397e-10,-0.014409,-0.003863
4,4,2239,127,1,4,0.003794,-0.000501,1,-0.016968,0.000288,...,5.1122e-10,-9.45882e-12,-3.04797e-08,9.23164e-11,-1.5664e-12,2.89822e-14,9.33909e-11,-2.86153e-13,-0.021206,-0.012569


### 2). Check the Theoretical Value of $\theta$ and $\phi$ on targe

In [3]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

fig = plt.figure(figsize=[15,15])
gs = gridspec.GridSpec(nrows=3, ncols=3, height_ratios=[1, 1, 1])

ax0 = fig.add_subplot(gs[0, 0])
ax0.scatter(data[data.runID.eq(2239)]["targCalPh"],data[data.runID.eq(2239)]["targCalTh"])
ax0.set_title('Theoretical Value 2239')

ax1 = fig.add_subplot(gs[0, 1])
ax1.scatter(data[data.runID.eq(2240)]["targCalPh"],data[data.runID.eq(2240)]["targCalTh"])
ax1.set_title('Theoretical Value 2240')

ax2 = fig.add_subplot(gs[0, 2])
ax2.scatter(data[data.runID.eq(2241)]["targCalPh"],data[data.runID.eq(2241)]["targCalTh"])
ax2.set_title('Theoretical Value 2241')

ax3 = fig.add_subplot(gs[1, 0])
ax3.scatter(data[data.runID.eq(2244)]["targCalPh"],data[data.runID.eq(2244)]["targCalTh"])
ax3.set_title('Theoretical Value 2244')

ax4 = fig.add_subplot(gs[1, 1])
ax4.scatter(data[data.runID.eq(2245)]["targCalPh"],data[data.runID.eq(2245)]["targCalTh"])
ax4.set_title('Theoretical Value 2245')

ax5 = fig.add_subplot(gs[1, 2])
ax5.scatter(data[data.runID.eq(2256)]["targCalPh"],data[data.runID.eq(2256)]["targCalTh"])
ax5.set_title('Theoretical Value 2256')

ax6 = fig.add_subplot(gs[2, 0])
ax6.scatter(data[data.runID.eq(2257)]["targCalPh"],data[data.runID.eq(2257)]["targCalTh"])
ax6.set_title('Theoretical Value 2257')

plt.tight_layout()
plt.show()

KeyboardInterrupt: 

### 3). Pre-process the data

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import  Dataset, TensorDataset, DataLoader
import torch


train_X, test_X, train_y, test_y = train_test_split(
    data.drop(labels=['evtID','runID','SieveRowID','SieveColID','CutID','bpmX','bpmY','targCalTh','targCalPh'], axis=1),
    data[['targCalTh','targCalPh']],
    test_size=0.2,
    random_state=0)


train_X_tensor = torch.tensor(train_X.to_numpy()).float()
train_yTh_tensor = torch.tensor(train_y['targCalTh'].to_numpy()).float()
train_yPh_tensor = torch.tensor(train_y['targCalPh'].to_numpy()).float()

test_X_tensor  = torch.tensor(test_X.to_numpy()).float()
test_yTh_tensor  = torch.tensor(test_y['targCalTh'].to_numpy()).float()
test_yPh_tensor  = torch.tensor(test_y['targCalPh'].to_numpy()).float()

n_feature = train_X.shape[1]


train_X.head()



# train_theta_loader = DataLoader(dataset=TensorDataset(train_X_tensor,train_yTh_tensor),batch_size=batchSize)
# train_phi_loader   = DataLoader(dataset=TensorDataset(train_X_tensor,train_yPh_tensor),batch_size=batchSize)
#
# test_theta_loader = DataLoader(dataset=TensorDataset(test_X_tensor,test_yTh_tensor),batch_size=batchSize)
# test_phi_loader   = DataLoader(dataset=TensorDataset(test_X_tensor,test_yPh_tensor),batch_size=batchSize)

## Build the model and Get the GPUs

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class OptNet_Theta(nn.Module):
    def __init__(self,l1=120):
        super(OptNet_Theta,self).__init__()
        self.fc1 = nn.Linear(n_feature, l1)  # 6*6 from image dimension
        self.output = nn.Linear(l1, 1)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        x = self.output(x)
        return x

class OptNet_Phi(nn.Module):
    def __init__(self,l1=120):
        super(OptNet_Phi,self).__init__()
        self.fc1 = nn.Linear(n_feature, l1)  # 6*6 from image dimension
        # self.fc2 = nn.Linear(l1, l2)
        self.output = nn.Linear(l1, 1)

    def forward(self, x):
        x = F.sigmoid(self.fc1(x))
        # x = F.sigmoid(self.fc2(x))
        x = self.output(x)
        return x

## Train the model With All GPUs
### 1. Train the $\phi_{targ}$ dimension

In [None]:
def trainner_phi(config, checkpoint_dir=None, data_dir=None):
    net = OptNet_Phi(config["l1"])#,config["l2"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config["lr"])

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    trainloader = DataLoader(
        dataset=TensorDataset(train_X_tensor,train_yPh_tensor),
        shuffle=True,
        batch_size=int(config["batch_size"]))

    valloader = DataLoader(
        dataset=TensorDataset(test_X_tensor,test_yPh_tensor),
        shuffle=True,
        batch_size=int(config["batch_size"]))

    # TODO change the to setting file epoch ??
    for epoch in range(10):
        running_loss = 0.0
        epoch_steps = 0

        for  i, data in enumerate(trainloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            #forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs,labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.9f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finish Training")

### 1. Train the $\theta_{targ}$ dimension

In [None]:
def trainner_theta(config, checkpoint_dir=None, data_dir=None):
    net = OptNet_Theta(config["l1"])#,config["l2"])

    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if torch.cuda.device_count() > 1:
            net = nn.DataParallel(net)
    net.to(device)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config["lr"])

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        net.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    trainloader = DataLoader(
        dataset=TensorDataset(train_X_tensor,train_yTh_tensor),
        shuffle=True,
        batch_size=int(config["batch_size"]))

    valloader = DataLoader(
        dataset=TensorDataset(test_X_tensor,test_yTh_tensor),
        shuffle=True,
        batch_size=int(config["batch_size"]))

    # TODO change the to setting file epoch ??
    for epoch in range(10):
        running_loss = 0.0
        epoch_steps = 0

        for  i, data in enumerate(trainloader):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            #forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs,labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            epoch_steps += 1
            if i % 2000 == 1999:  # print every 2000 mini-batches
                print("[%d, %5d] loss: %.9f" % (epoch + 1, i + 1,
                                                running_loss / epoch_steps))
                running_loss = 0.0

        # Validation loss
        val_loss = 0.0
        val_steps = 0
        total = 0
        correct = 0
        for i, data in enumerate(valloader, 0):
            with torch.no_grad():
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = net(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loss = criterion(outputs, labels)
                val_loss += loss.cpu().numpy()
                val_steps += 1

        with tune.checkpoint_dir(epoch) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((net.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
    print("Finish Training")

### 3. trainner runner

In [None]:
from functools import partial

def trainPhi_run(num_samples=10, max_num_epochs=10, gpus_per_trial=1):
    data_dir = os.path.abspath("./data")
    config = {
        "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        #"l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)),
        "lr": tune.loguniform(1e-5, 1e-1),
        "batch_size": tune.choice([2, 4, 8, 16,32,64,128])
    }
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)

    reporter = CLIReporter(
        # parameter_columns=["l1", "l2", "lr", "batch_size"],
        metric_columns=["loss", "accuracy", "training_iteration"])

    result = tune.run(
        partial(trainner_phi, data_dir=data_dir),
        name="phi_scan",
        resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

    best_trained_model = OptNet_Phi(best_trial.config["l1"], best_trial.config["l2"])
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)
    best_checkpoint_dir = best_trial.checkpoint.value
    model_state, optimizer_state = torch.load(os.path.join(
        best_checkpoint_dir, "checkpoint"))
    best_trained_model.load_state_dict(model_state)

## Train Main Runner

In [None]:
trainPhi_run(num_samples=10, max_num_epochs=10, gpus_per_trial=0)

end