In [3]:
import os
import pickle
import sys
import time

In [4]:
import lightgbm as lgb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats

In [5]:
from snmachine import snclassifier
from snmachine.utils.plasticc_pipeline import get_directories, load_dataset

In [6]:
import warnings
warnings.simplefilter('always', DeprecationWarning)

In [7]:
%config Completer.use_jedi = False  

In [8]:
analysis_name = 'example_dataset_aug' 

In [9]:
folder_path = 'C:\\Users\\Alcatraz\\Desktop\\Supernova\\snmachine-main\\snmachine\\example_data'

directories = get_directories(folder_path, analysis_name) 
path_saved_features = directories['features_directory']

In [10]:
X = pd.read_pickle(os.path.join(path_saved_features, 'features.pckl'))
y = pd.read_pickle(os.path.join(path_saved_features, 'data_labels.pckl'))  

In [11]:
data_file_name = 'example_dataset_aug.pckl'

In [12]:
data_path = os.path.join(folder_path, data_file_name)
dataset = load_dataset(data_path)

Opening from binary pickle
Dataset loaded from pickle file as: <snmachine.sndata.PlasticcData object at 0x000001A63043C370>


In [13]:
metadata = dataset.metadata

In [14]:
classifier_instance = snclassifier.LightGBMClassifier(classifier_name='our_classifier', random_seed=42)

Created classifier of type: LGBMClassifier(random_state=42).



In [15]:
param_grid={'learning_rate': [.1, .25, .5]}

classifier_instance.optimise(X, y, param_grid=param_grid, scoring='logloss', 
                             number_cv_folds=5, metadata=metadata)

Cross-validation for an augmented dataset.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000737 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 724
[LightGBM] [Info] Number of data points in the train set: 47, number of used features: 42
[LightGBM] [Info] Start training from score -1.211090
[LightGBM] [Info] Start training from score -1.452252
[LightGBM] [Info] Start training from score -0.759105
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 769
[LightGBM] [Info] Number of data points in the train set: 50, number of used features: 42
[LightGBM] [Info] Start training from score -1.139434
[LightGBM] [Info] Start training from score -1.514128
[LightGBM] [Info] Start training from score -0.776529

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000093 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 798
[LightGBM] [Info] Number of data points in the train set: 52, number of used features: 42
[LightGBM] [Info] Start training from score -1.243194
[LightGBM] [Info] Start training from score -1.553348
[LightGBM] [Info] Start training from score -0.693147
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 783
[LightGBM] [Info] Number of data points in the train set: 51, number of used features: 42
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.446919
[LightGBM] [Info] Start training from score -0.840783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000059 seconds.
Yo

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 783
[LightGBM] [Info] Number of data points in the train set: 51, number of used features: 42
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.446919
[LightGBM] [Info] Start training from score -0.840783
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 724
[LightGBM] [Info] Number of data points in the train set: 47, number of used features: 42
[LightGBM] [Info] Start training from score -1.211090
[LightGBM] [Info] Start training from score -1.452252
[LightGBM] [Info] Start training from score -0.759105


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 769
[LightGBM] [Info] Number of data points in the train set: 50, number of used features: 42
[LightGBM] [Info] Start training from score -1.139434
[LightGBM] [Info] Start training from score -1.514128
[LightGBM] [Info] Start training from score -0.776529
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000067 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 742
[LightGBM] [Info] Number of data points in the train set: 48, number of used features: 42
[LightGBM] [Info] Start training from score -1.232144
[LightGBM] [Info] Start training from score -1.473306
[LightGBM] [Info] Start training from score -0.735707
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds.
Yo

The optimisation takes 0.540s.


In [16]:
classifier_instance.classifier

In [17]:
classifier_instance.grid_search.best_params_

{'learning_rate': 0.1}

In [18]:
classifier_instance.classifier_name

'our_classifier'

In [25]:
path_saved_classifier = directories['classifications_directory']

In [26]:
classifier_instance.save_classifier(path_saved_classifier)

Classifier saved in C:\Users\Alcatraz\Desktop\Supernova\snmachine-main\snmachine\example_data\example_dataset_aug\classifications\our_classifier.pck .
