In [1]:
import os
import random
from dotenv import load_dotenv
from huggingface_hub import login
from datasets import load_dataset
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv(override=True)
hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

dataset = load_dataset("aslam-naseer/js-function-complexity-processed")

Token has not been saved to git credential helper.
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m


In [3]:
train_ds = dataset['train']
val_ds = dataset['validation']
test_ds = dataset['test']

In [4]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if using multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

### Neural Network model

In [5]:
%load_ext autoreload
%autoreload 2

from normalise_features import normalise_features
from complexity_nn import create_scaler, train_model

In [6]:
scaler = create_scaler(train_ds)

feature_cols = [
    'param_count',
    'local_statement_count',
    'total_statement_count',
    'local_variable_count',
    'total_variable_count',
    'local_nesting_depth',
    'total_nesting_depth'
]

train_ds = train_ds.map(lambda data: normalise_features(data, scaler), num_proc=4)
val_ds = val_ds.map(lambda data: normalise_features(data, scaler), num_proc=4)

train_ds.set_format(type='torch', columns=['features', 'complexity'])
val_ds.set_format(type='torch', columns=['features', 'complexity'])

In [7]:
set_seed(42)
model = train_model(50, train_ds, val_ds)

Epoch [10/50], Train Loss: 1.7190, Val Loss: 1.3172
Epoch [20/50], Train Loss: 2.4708, Val Loss: 1.2932
Epoch [30/50], Train Loss: 1.5058, Val Loss: 1.2111
Epoch [40/50], Train Loss: 1.4355, Val Loss: 1.2254
Epoch [50/50], Train Loss: 1.3653, Val Loss: 1.1810


In [8]:
import joblib

print("Training complete. Saving artifacts...")
joblib.dump(scaler, 'artifacts/scaler.pkl')
torch.save(model.state_dict(), 'artifacts/neural_network.pth')

print("Saved 'scaler.pkl' and 'neural_network.pth'")

Training complete. Saving artifacts...
Saved 'scaler.pkl' and 'neural_network.pth'


In [9]:
def nn_predict(data):
  model.eval()
  with torch.no_grad():
    input = normalise_features(data, scaler)['features']
    output = model(torch.tensor(input).float())
    return output[0].item()
  

In [10]:
for i in range(10):
  y = test_ds[i]['complexity']
  y1 = nn_predict(test_ds[i])

  print("True complexity:", y)
  print(f"Predicted complexity: {y1:.1f}")
  print(f"Difference: {abs(y - y1):.1f}") 
  print()

True complexity: 9.1
Predicted complexity: 8.3
Difference: 0.8

True complexity: 6.3
Predicted complexity: 8.0
Difference: 1.7

True complexity: 4.2
Predicted complexity: 3.8
Difference: 0.4

True complexity: 10.0
Predicted complexity: 8.5
Difference: 1.5

True complexity: 3.1
Predicted complexity: 3.5
Difference: 0.4

True complexity: 1.2
Predicted complexity: 2.1
Difference: 0.9

True complexity: 3.2
Predicted complexity: 2.9
Difference: 0.3

True complexity: 7.0
Predicted complexity: 6.1
Difference: 0.9

True complexity: 6.2
Predicted complexity: 6.6
Difference: 0.4

True complexity: 4.8
Predicted complexity: 3.9
Difference: 0.9

