In [1]:
# -*- coding: utf-8 -*-
# pylint: disable=W0108
%load_ext autoreload
%autoreload 2
import numpy
from astropy.io import fits
from sklearn.linear_model import RidgeClassifier, SGDClassifier,LogisticRegression
from sklearn.model_selection import train_test_split

from plots import matrix_confusion, report, roc
from utils import get_folders, get_user_data_general, parallel_style_w_one_arg


In [2]:
# ------------------------------------------------------------------------------------------------------------------------------------------------------------------


common_args = {"max_iter": 5000, "tol": 1e-4, "learning_rate": "optimal", "early_stopping": True, "n_iter_no_change": 50}

# MODELS = [
#     SGDClassifier(loss="hinge", **common_args),
#     SGDClassifier(loss="log_loss", **common_args),
#     SGDClassifier(loss="perceptron", **common_args),
#     RidgeClassifier(max_iter=5000, solver="svd"),
# ]

# NAMES = ["SVM", "LOGISTIC_REGRESSION", "PERCEPTRON", "RIDGE_REGRESSION"]

MODELS = [
    RidgeClassifier(max_iter=5000, solver="svd"),
]

NAMES = ["RIDGE_REGRESSION"]

# ------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [3]:
# USER DATA
NAME_DB, PATH_FOLDERS, DIRECTORIES, PERCENTAGE_TRAIN, _ = get_user_data_general()
FOLDERS = get_folders(PATH_FOLDERS, NAME_DB, DIRECTORIES)
FOLDER_DATABASE, FOLDER_PLOTS = FOLDERS[0], FOLDERS[1]
path_image_files = sorted(FOLDER_DATABASE.glob("*.fits"))
path_labels_files = sorted(FOLDER_DATABASE.glob("*.npy"))

# COLLECT DATASET
raw_images_data = parallel_style_w_one_arg(func=lambda arg: fits.getdata(arg), data=path_image_files)
labels_data = parallel_style_w_one_arg(func=lambda arg: numpy.load(arg), data=path_labels_files)
del path_image_files, path_labels_files

# SPLIT DATASET
raw_train_images_data, raw_test_images_data, training_labels_data, test_labels_data = train_test_split(
    raw_images_data, labels_data, train_size=PERCENTAGE_TRAIN, shuffle=True
)
del raw_images_data

# NORMALISE
MIN_GLOBAL, MAX_GLOBAL = numpy.amin(raw_train_images_data), numpy.amax(raw_train_images_data)
train_images_data = (raw_train_images_data - MIN_GLOBAL) / (MAX_GLOBAL - MIN_GLOBAL)
test_images_data = (raw_test_images_data - MIN_GLOBAL) / (MAX_GLOBAL - MIN_GLOBAL)
del raw_train_images_data, raw_test_images_data

# RESHAPE FOR FIT
reshaped_train_images_data = numpy.reshape(train_images_data, (numpy.shape(train_images_data)[0], -1))
reshaped_test_images_data = numpy.reshape(test_images_data, (numpy.shape(test_images_data)[0], -1))
del train_images_data, test_images_data



loading images with 20 cpus
loading images with 20 cpus


In [4]:
# TRAINING
for num_model, model in enumerate(MODELS):
    case = f"Model{NAMES[num_model]}_Ratio{int(PERCENTAGE_TRAIN*100)}"
    print(case)
    model.fit(reshaped_train_images_data, training_labels_data)
    predictions = model.predict(reshaped_test_images_data)
    predictions_proba = model.decision_function(reshaped_test_images_data)
    matrix_confusion(test_labels_data, predictions, FOLDER_PLOTS, case, title=NAMES[num_model])
    roc(predictions_proba, test_labels_data, FOLDER_PLOTS, case)
    report(test_labels_data, predictions, FOLDER_PLOTS, case)


ModelRIDGE_REGRESSION_Ratio60


In [5]:
roc(predictions_proba, test_labels_data, FOLDER_PLOTS, case,roc_data_file='../Results/LWIRISEG/Results/classif.csv')


In [15]:
predictions_prob = model.predict_proba(reshaped_test_images_data)


In [16]:
roc(predictions_prob,test_labels_data)

array([[0.9783505 , 0.0216495 ],
       [0.00210299, 0.99789701],
       [0.98025704, 0.01974296],
       ...,
       [0.99264278, 0.00735722],
       [0.96098125, 0.03901875],
       [0.00102751, 0.99897249]])