<a href="https://colab.research.google.com/github/ryantuckman/Machine-Learning/blob/main/pytorch_gluon_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torchviz

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchviz
  Downloading torchviz-0.0.2.tar.gz (4.9 kB)
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
  Created wheel for torchviz: filename=torchviz-0.0.2-py3-none-any.whl size=4150 sha256=1c09d4c9d7ced5f82658ac421638d912ca0bafcd13b114f24accf89ac61145ab
  Stored in directory: /root/.cache/pip/wheels/04/38/f5/dc4f85c3909051823df49901e72015d2d750bd26b086480ec2
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.2


In [2]:
# Import relevant python modules
import sys, os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# The datasets needed are computed by the `ComputeGluon.py` script in PseudoData
filename1='https://raw.githubusercontent.com/rabah-khalek/TF_tutorials/master/PseudoData/gluon_NNPDF31_nlo_pch_as_0118_xmin1e-3.dat' 
filename2='https://raw.githubusercontent.com/rabah-khalek/TF_tutorials/master/PseudoData/gluon_NNPDF31_nlo_pch_as_0118_xmin1e-4.dat' 
filename3='https://raw.githubusercontent.com/rabah-khalek/TF_tutorials/master/PseudoData/gluon_NNPDF31_nlo_pch_as_0118_xmin1e-5.dat' 
filename4='https://raw.githubusercontent.com/rabah-khalek/TF_tutorials/master/PseudoData/gluon_NNPDF31_nlo_pch_as_0118_xmin1e-6.dat' 

# Headers to skip
lines_to_skip = 5

# Defining the columns (cv = central value, sd = standard deviation)
columns=["x", "gluon_cv", "gluon_sd"]

# Loading data from txt file
# Change filename1 to another filename for data that extends to lower x 
# (see exercises at the bottom of this notebook)
df = pd.read_csv(filename1, 
                 sep="\s+", 
                 skiprows=lines_to_skip, 
                 usecols=[0,1,2], 
                 names=columns)

# Splitting data randomly to train and test using the sklearn library
df_train, df_test = train_test_split(df,test_size=0.2,random_state=42)

# Sort the split data according to their x values
df_train = df_train.sort_values("x")
df_test = df_test.sort_values("x")

print("Data parsing is done!")

Data parsing is done!


In [3]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchviz import make_dot
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data.dataset import random_split

In [4]:
import torch.nn as nn
import torch.nn.functional as F


##################################################################
# Building NN from the PyTorch API (nn.Linear)
##################################################################

class Torch_Model(nn.Module):
    
    def __init__(self, n_features, n_neurons, n_outputs):

        super(Torch_Model, self).__init__()
        
        self.fc1 = nn.Linear(n_features, n_neurons) # Hidden Layer 1
        self.sig1 = nn.Sigmoid()
        self.fc2 = nn.Linear(n_neurons, n_outputs) # Output Layer

    # Evaluates and returns output
    def forward(self, x):
        out = self.fc1(x)
        out = self.sig1(out)
        out = self.fc2(out)
        return out

In [5]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
# Hyperparameters
n_features = 1
n_neurons = 20
n_outputs = 1
learning_rate = 0.001
batch_size = 20
n_epochs = 1000

In [7]:
# Load data

#print(df_train.to_numpy())
#train_dataset, val_dataset = random_split(df_train.to_numpy(), [80, 20])

train_loader = DataLoader(df_train.to_numpy(), batch_size=batch_size)
val_loader = DataLoader(df_test.to_numpy(), batch_size=batch_size)



In [8]:
# Initialize network
model = Torch_Model(n_features, n_neurons, n_outputs).to(device)

In [9]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
def make_train_step(model, loss_fn, optimizer):
    def train_step(x, y, dy):
        model.train()
        yhat = model(x)
        loss = loss_fn(y/dy, yhat/dy)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return loss.item()
    return train_step

In [12]:
input_dim = 1
hidden_dim = 20
output_dim = 1
model = Torch_Model(input_dim, hidden_dim, output_dim)

loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
train_step = make_train_step(model, loss_fn, optimizer)

n_epochs = 1000
training_losses = []
validation_losses = []
#print(model.state_dict())

for epoch in range(n_epochs):
    batch_losses = []
    for data_batch in train_loader:
        data_batch = data_batch.to(device)
        x_batch = torch.t(data_batch[:,0][None]).float()
        y_batch = torch.t(data_batch[:,1][None]).float()
        dy_batch = torch.t(data_batch[:,2][None]).float()

        loss = train_step(x_batch, y_batch, dy_batch)
        batch_losses.append(loss)
    training_loss = np.mean(batch_losses)
    training_losses.append(training_loss)

    with torch.no_grad():
        val_losses = []
        for data_batch_val in val_loader:
            x_batch_val = torch.t(data_batch_val[:,0][None]).float()
            y_batch_val = torch.t(data_batch_val[:,1][None]).float()
            dy_batch_val = torch.t(data_batch_val[:,2][None]).float()
            model.eval()
            yhat = model(x_batch_val)
            val_loss = loss_fn(y_batch_val/dy_batch_val, yhat/dy_batch_val).item()
            val_losses.append(val_loss)
        validation_loss = np.mean(val_losses)
        validation_losses.append(validation_loss)

    print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")

#print(model.state_dict())


[1] Training loss: 529.891	 Validation loss: 491.048
[2] Training loss: 418.818	 Validation loss: 449.511
[3] Training loss: 405.371	 Validation loss: 436.516
[4] Training loss: 402.509	 Validation loss: 432.867
[5] Training loss: 401.681	 Validation loss: 431.148
[6] Training loss: 400.815	 Validation loss: 429.584
[7] Training loss: 399.689	 Validation loss: 427.933
[8] Training loss: 398.377	 Validation loss: 426.199
[9] Training loss: 396.939	 Validation loss: 424.399
[10] Training loss: 395.409	 Validation loss: 422.543
[11] Training loss: 393.802	 Validation loss: 420.639
[12] Training loss: 392.130	 Validation loss: 418.694
[13] Training loss: 390.403	 Validation loss: 416.714
[14] Training loss: 388.629	 Validation loss: 414.704
[15] Training loss: 386.813	 Validation loss: 412.666
[16] Training loss: 384.961	 Validation loss: 410.605
[17] Training loss: 383.077	 Validation loss: 408.523
[18] Training loss: 381.166	 Validation loss: 406.422
[19] Training loss: 379.230	 Validati