In [8]:
! nvidia-smi

Wed May 18 03:07:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8    27W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [10]:
!pip install pytorch-tabnet optuna

Collecting pytorch-tabnet
  Downloading pytorch_tabnet-3.1.1-py3-none-any.whl (39 kB)
Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 11.5 MB/s 
Collecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting alembic
  Downloading alembic-1.7.7-py3-none-any.whl (210 kB)
[K     |████████████████████████████████| 210 kB 31.0 MB/s 
[?25hCollecting cliff
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 7.7 MB/s 
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting Mako
  Downloading Mako-1.2.0-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 5.4 MB/s 
Collecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 5.2 MB/s 
[?25hCollecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.9.0-py2.py3-none-any.whl (112 kB)
[K

Working fine

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, train_test_split
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score,classification_report
import optuna as opt
import torch
import os
import joblib

In [12]:
def make_save_cv_model(i,model_name,model,best_params,optim,output_path="./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/cross_validated_models"):

    ''' This function saves cross validation model in the corresponding directory ( if the path does not exist it creates the path for it'''


    if os.path.exists(os.path.join(output_path,f"{i}_{model_name}_{optim}")):
        joblib.dump(model, os.path.join(output_path,f"{i}_{model_name}_{optim}/{i}_model.z"))
        with open(os.path.join(output_path,f"{i}_{model_name}_{optim}/model_params.txt"),"w+") as file:
            file.write(str(best_params))
    else:
        os.mkdir(os.path.join(output_path,f"{i}_{model_name}_{optim}"))
        joblib.dump(model, os.path.join(output_path,f"{i}_{model_name}_{optim}/{i}_model.z"))
        with open(os.path.join(output_path,f"{i}_{model_name}_{optim}/model_params.txt"),"w+") as file:
            file.write(str(best_params))


In [16]:
def train(model_name,sc_df,tar_col,optim,optim_trial,k_folds=10,tar_cols="",verbose=1):

    ''' this function is used to train the model with parameters optimization using optuna and cross validation using stratified k_folds'''

    y = sc_df[tar_col]
    x = sc_df.drop([tar_col],axis=1)
    # k_fold constructing the cross-validation framework
    skf = StratifiedKFold(n_splits=k_folds,shuffle=True, random_state=123 )
    model_name = model_name 
    acc_scores = []
    for i, (train_index, test_index) in enumerate(skf.split(x,y)):   
        def objective(trial):
            clf = TabNetClassifier(n_d=trial.suggest_int("n_d", 8, 64),
                                    n_a =trial.suggest_int("n_a", 8, 64),
                                    n_steps = trial.suggest_int("n_steps",3,10),
                                    gamma =trial.suggest_float("gamma", 1.0, 2.0),
                                    n_independent = trial.suggest_int("n_independent",1,5),
                                    n_shared = trial.suggest_int("n_shared",1,5),
                                    momentum = trial.suggest_float("momentum", 0.01, 0.4),
                                    optimizer_fn = torch.optim.Adam,
                                    # scheduler_fn = torch.optim.lr_scheduler,
                                    # scheduler_params = {"gamma" :trial.suggest_float("sch-gamma", 0.5, 0.95), "step_size": trial.suggest_int("sch_step_size", 10, 20, 2)},
                                    verbose = verbose,
                                    device_name = "auto"
                                    )
            # print(f" train_index :: {train_index}")
            # print(f" test_index :: {test_index}")
            X_train,X_test = x.iloc[train_index,:], x.iloc[test_index,:]
            # print(X_train.shape, X_test.shape)
            X_train, X_test = X_train.to_numpy(dtype=np.float64), X_test.to_numpy(dtype=np.float64)
            Y_train, Y_test = y.iloc[train_index], y.iloc[test_index]
            Y_train, Y_test = Y_train.to_numpy(dtype=np.float64), Y_test.to_numpy(dtype=np.float64)
            print(Y_train.shape, Y_test.shape)
            clf.fit(X_train, Y_train,
                    eval_set=[(X_test, Y_test)],
                    eval_metric=['accuracy'])
            Y_pred = clf.predict(X_test)
            print(classification_report(Y_test, Y_pred, labels=[x for x in range(6)]))
            clf_report = classification_report(Y_test, Y_pred, labels=[x for x in range(6)])
            joblib.dump(clf_report,f"./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/classification_report/comp/{i}_{model_name}_classification_report.z")
            with open(f"./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/classification_report/{model_name}_{i}_classification_report.txt","w+") as file:file.write(str(clf_report))
            print(f"Saved classification_report at : ./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/classification_report/{model_name}_{i}_classification_report.txt")
            acc = accuracy_score(Y_pred, Y_test)
            return acc

        print(f"Starting optimization for fold : [{i}/{k_folds}]")
        study = opt.create_study(direction='maximize')
        study.optimize(objective, n_trials=optim_trial)
        best_params = study.best_params
        print(f" Best params for fold : [{i}/{k_folds}]")
        print(best_params)
        joblib.dump(best_params,f"./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/{model_name}/best_params/comp/fold_{i}_best_params.z")
        with open(f"./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/{model_name}/best_params/fold_{i}_best_params.txt", "w+") as file:file.write(str(best_params))
        print(f"Saved best_params at : outputs/{model_name}/best_params/fold_{i}_best_params.txt")
        X_train,X_test = x.iloc[train_index,:], x.iloc[test_index,:]
        # print(X_train.shape, X_test.shape)
        X_train, X_test = X_train.to_numpy(dtype=np.float64), X_test.to_numpy(dtype=np.float64)
        Y_train, Y_test = y.iloc[train_index], y.iloc[test_index]
        Y_train, Y_test = Y_train.to_numpy(dtype=np.float64), Y_test.to_numpy(dtype=np.float64)
        clf_model = TabNetClassifier(**study.best_params)
        clf_model.fit(X_train,Y_train)
        Y_pred = clf_model.predict(X_test)
        accuracy = accuracy_score(Y_pred, Y_test)
        acc_scores.append(accuracy)
        with open(f"./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/{model_name}/{model_name}_{i}_accuracy_score.txt","w+") as file:file.write(f" accuracy :: {str(accuracy)}")
        try:
            print("[++] Saving the model and parameters in corresponding directories")
            make_save_cv_model(i,model_name,clf_model,best_params,optim=optim)
        except:
            print("[-] Failed to save the model")
    print(f" Average accuracy achieved : {np.mean(acc_scores)}")

In [17]:
use_df = pd.read_csv("./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/outputs/data/trainable_scaled_balanced.csv")
tar_col = "PCE_categorical"
model_name = "pytorch_tabnet"
optimizer = "Adam"
folds = 15

In [None]:
train(model_name=model_name,
        sc_df=use_df,
        tar_col=tar_col,
        optim=optimizer,
        optim_trial = 15,
        k_folds=folds)
print("[++] Ended the training process ...")

[32m[I 2022-05-18 07:23:22,028][0m A new study created in memory with name: no-name-eacfbd82-fd68-4641-bac7-fb6ad02a02bf[0m


Starting optimization for fold : [0/15]
Device used : cuda
(43698,) (3122,)
epoch 0  | loss: 2.44829 | val_0_accuracy: 0.32928 |  0:00:14s
epoch 1  | loss: 1.2638  | val_0_accuracy: 0.5016  |  0:00:24s
epoch 2  | loss: 1.21572 | val_0_accuracy: 0.59449 |  0:00:35s
epoch 3  | loss: 1.02084 | val_0_accuracy: 0.57912 |  0:00:45s
epoch 4  | loss: 1.01739 | val_0_accuracy: 0.60378 |  0:00:56s
epoch 5  | loss: 0.85561 | val_0_accuracy: 0.63389 |  0:01:07s
epoch 6  | loss: 0.82555 | val_0_accuracy: 0.63677 |  0:01:18s
epoch 7  | loss: 0.79706 | val_0_accuracy: 0.63004 |  0:01:28s
epoch 8  | loss: 0.78679 | val_0_accuracy: 0.63901 |  0:01:39s
epoch 9  | loss: 0.76119 | val_0_accuracy: 0.65022 |  0:01:49s
epoch 10 | loss: 0.74097 | val_0_accuracy: 0.6467  |  0:01:59s
epoch 11 | loss: 0.73173 | val_0_accuracy: 0.65279 |  0:02:09s
epoch 12 | loss: 0.72052 | val_0_accuracy: 0.65663 |  0:02:19s
epoch 13 | loss: 0.70893 | val_0_accuracy: 0.65567 |  0:02:29s
epoch 14 | loss: 0.72118 | val_0_accuracy:

[32m[I 2022-05-18 07:31:23,067][0m Trial 0 finished with value: 0.726457399103139 and parameters: {'n_d': 16, 'n_a': 13, 'n_steps': 10, 'gamma': 1.2173053387504842, 'n_independent': 4, 'n_shared': 2, 'momentum': 0.30052748594944584}. Best is trial 0 with value: 0.726457399103139.[0m


Saved classification_report at : ./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/classification_report/pytorch_tabnet_0_classification_report.txt
Device used : cuda
(43698,) (3122,)
epoch 0  | loss: 1.31923 | val_0_accuracy: 0.6009  |  0:00:07s
epoch 1  | loss: 0.90013 | val_0_accuracy: 0.6246  |  0:00:14s
epoch 2  | loss: 0.83923 | val_0_accuracy: 0.62684 |  0:00:21s
epoch 3  | loss: 0.80873 | val_0_accuracy: 0.64894 |  0:00:29s
epoch 4  | loss: 0.78524 | val_0_accuracy: 0.65631 |  0:00:36s
epoch 5  | loss: 0.77551 | val_0_accuracy: 0.65119 |  0:00:44s
epoch 6  | loss: 0.7766  | val_0_accuracy: 0.65311 |  0:00:52s
epoch 7  | loss: 0.74136 | val_0_accuracy: 0.67104 |  0:00:59s
epoch 8  | loss: 0.72497 | val_0_accuracy: 0.67905 |  0:01:07s
epoch 9  | loss: 0.70843 | val_0_accuracy: 0.67008 |  0:01:14s
epoch 10 | loss: 0.69759 | val_0_accuracy: 0.68962 |  0:01:22s
epoch 11 | loss: 0.69542 | val_0_accuracy: 0.70147 |  0:01:29s
epoch 12 | loss: 0.68468 | val_0_accuracy: 0.68994 |  0:01:37s
ep

[32m[I 2022-05-18 07:43:36,646][0m Trial 1 finished with value: 0.7921204356181935 and parameters: {'n_d': 51, 'n_a': 62, 'n_steps': 8, 'gamma': 1.0122858838282471, 'n_independent': 2, 'n_shared': 3, 'momentum': 0.20007322455322335}. Best is trial 1 with value: 0.7921204356181935.[0m


              precision    recall  f1-score   support

           0       1.00      1.00      1.00       550
           1       0.61      0.60      0.61       551
           2       0.79      0.93      0.86       550
           3       1.00      1.00      1.00       551
           4       0.58      0.54      0.56       507
           5       0.71      0.62      0.66       413

    accuracy                           0.79      3122
   macro avg       0.78      0.78      0.78      3122
weighted avg       0.79      0.79      0.79      3122

Saved classification_report at : ./drive/MyDrive/SOLAR_CELL/ML_PROCESSED_DATA/classification_report/pytorch_tabnet_0_classification_report.txt
Device used : cuda
(43698,) (3122,)
epoch 0  | loss: 1.51579 | val_0_accuracy: 0.59097 |  0:00:05s
epoch 1  | loss: 0.90102 | val_0_accuracy: 0.59193 |  0:00:10s
epoch 2  | loss: 0.86262 | val_0_accuracy: 0.62076 |  0:00:16s
epoch 3  | loss: 0.84078 | val_0_accuracy: 0.63197 |  0:00:21s
epoch 4  | loss: 0.80039 |