# Ray- Grid Search

In [1]:
from functools import partial
import os
import tempfile
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
import torchvision
import torchvision.transforms as transforms
from ray import tune
from ray import train
from ray.train import Checkpoint, get_checkpoint
from ray.tune.schedulers import ASHAScheduler
import ray.cloudpickle as pickle

# Let's Start Ray

In [3]:
import ray
from ray import tune

ray.shutdown()
ray.init(num_cpus=5, num_gpus=0, include_dashboard=True)

2024-10-19 10:19:42,909	INFO worker.py:1777 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.12
Ray version:,2.37.0
Dashboard:,http://127.0.0.1:8265


# Setup some Raytune Compatible Training code

In [4]:
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import sklearn
from sklearn.model_selection import StratifiedKFold
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline

from sklearn.metrics import f1_score

In [5]:
pip show ray

Name: ray
Version: 2.37.0
Summary: Ray provides a simple, universal API for building distributed applications.
Home-page: https://github.com/ray-project/ray
Author: Ray Team
Author-email: ray-dev@googlegroups.com
License: Apache 2.0
Location: /home/sur06423/miniconda3/envs/vi_trans/lib/python3.10/site-packages
Requires: aiosignal, click, filelock, frozenlist, jsonschema, msgpack, packaging, protobuf, pyyaml, requests
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [9]:
# End to end function for model training
# In order to make this function end to end we are moving the dataset inside the function
# Usually, we don't keep dataset inside the training functions

def e2e_simple_training(config):
    # threadsafe
    X,y = sklearn.datasets.load_breast_cancer(return_X_y=True)

    # Choose your CV strategy
    splitter = StratifiedKFold(n_splits=5)

    # run k fold training and testing
    f1_scores = [] # Keep Hold of all individual scores
    for train_ind, test_ind in splitter.split(X,y):
        pipeline = make_pipeline(RobustScaler(), RandomForestClassifier(random_state=42))
        pipeline.set_params(**config)
        pipeline.fit(X[train_ind],y[train_ind])
        y_pred = pipeline.predict(X[test_ind])
        f1_scores.append(f1_score(y_pred, y[test_ind]))

    
    mean_f1_score=np.array(f1_scores).mean()
    std_f1_score=np.array(f1_scores).std()
    # Use Tune reporter
    train.report({"mean_f1_score" :mean_f1_score,
                   "std_f1_score" : std_f1_score}
                )


# In sklearn we have parameter grids in following form:
```
param_grid = {
    'randomforestclassifier__n_estimators': [1,5,15,50,100],
    'randomforestclassifier__criterion': ['gini', 'entropy'],
    'randomforestclassifier__bootstrap': [True, False]
}
```


# Ray search spaces
- The Ray config object is freeform, we imprint our own structure.
- However, tunable parameters need to be represented by tune distribution object.

In [10]:
# Ray configuration is slightly different from Sk-Learn one
# We are wrapping the arrays in tune.grid_search() object to get tuning config
ray_tuning_config = {
    'randomforestclassifier__n_estimators': tune.grid_search([1,5,15,50,100])
}

In [11]:
# To Run the trails we use tune.run()
# We supply end to end training function, configs, resources and save directory
analysis = tune.run(
                e2e_simple_training,
                config=ray_tuning_config,
                resources_per_trial=dict(cpu=1, gpu=0),
                storage_path="/home/sur06423/hiwi/vit_exp/vision_tranformer_baseline/statefarm/ray/ray_results/grid_search")

2024-10-19 10:27:21,721	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-10-19 10:27:27
Running for:,00:00:05.51
Memory:,13.4/503.4 GiB

Trial name,status,loc,randomforestclassifi er__n_estimators,iter,total time (s),mean_f1_score,std_f1_score
e2e_simple_training_f61e1_00000,TERMINATED,10.56.7.46:184530,1,1,0.0766888,0.926787,0.0224719
e2e_simple_training_f61e1_00001,TERMINATED,10.56.7.46:184533,5,1,0.0917346,0.960576,0.028015
e2e_simple_training_f61e1_00002,TERMINATED,10.56.7.46:184540,15,1,0.260756,0.963828,0.021657
e2e_simple_training_f61e1_00003,TERMINATED,10.56.7.46:184541,50,1,0.49525,0.963645,0.0195223
e2e_simple_training_f61e1_00004,TERMINATED,10.56.7.46:184584,100,1,0.859108,0.965198,0.0183413


Trial name,mean_f1_score,std_f1_score
e2e_simple_training_f61e1_00000,0.926787,0.0224719
e2e_simple_training_f61e1_00001,0.960576,0.028015
e2e_simple_training_f61e1_00002,0.963828,0.021657
e2e_simple_training_f61e1_00003,0.963645,0.0195223
e2e_simple_training_f61e1_00004,0.965198,0.0183413


2024-10-19 10:27:27,271	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/sur06423/hiwi/vit_exp/vision_tranformer_baseline/statefarm/ray/ray_results/grid_search/e2e_simple_training_2024-10-19_10-27-21' in 0.1041s.
2024-10-19 10:27:27,285	INFO tune.py:1041 -- Total run time: 5.56 seconds (5.40 seconds for the tuning loop).


In [29]:
ray.shutdown()