# A Small Experiment of Deep PU Learning

This is to understanding how PU learning works in Multi-Class Outlier Detection cases


### Setup

In [1]:
# This helps when you're loading functions  defined in an external script (if the script is updated while the notebook is running)
%load_ext autoreload 
%autoreload 2

import numpy as np
from sklearn.metrics import precision_score, recall_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
from sklearn.decomposition import PCA
from math import sqrt
from sklearn.preprocessing import StandardScaler
import copy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

import sys
sys.path.append("..")

from mypackage.data_models import generate_data_uniform_plus_normal
from mypackage.Deep_PUL import compute_deep_pu_scores, compute_deep_pu_scores_intersection_two_step, train_deep_model

## Data Generation

In [2]:
np.random.seed(42)
# Generate data
K = 3
dim = 50
means = [-1, 0, 1]
radius = 4
a_signal = 8

# Training data
n_in_train = [1000, 100, 100]

# Calibration data (Here callibrariton data has no effect if not using conformal inference)
n_in_cal = [1000, 1000, 1000]

# Test data (Unlabeled Data)
n_in_test = [90, 900, 90]
n_out_test = 300

# Data Generation
X_train, Y_train = generate_data_uniform_plus_normal(K, n_in_train, 0, dim, means, radius, a_signal)
X_cal, Y_cal = generate_data_uniform_plus_normal(K, n_in_cal, 0, dim, means, radius, a_signal)
X_test_part1, Y_test_part1 = generate_data_uniform_plus_normal(K, n_in_test, n_out_test, dim, means, radius, a_signal)
X_test_part2, Y_test_part2 = generate_data_uniform_plus_normal(K, n_in_test, n_out_test, dim, means, radius, a_signal)

### Three PU Learning Methods(One-Step, Two_Step, Intersection_Two_step)

In [3]:
# Define machine learning models
from sklearn.svm import SVC, OneClassSVM
from sklearn.ensemble import RandomForestClassifier

binary_classifier = SVC(C=1, probability=True)
oneclass_classifier = OneClassSVM(gamma='auto')

# Define Deep learning models
deep_models = train_deep_model(input_dim = dim, X_train = X_train, y_train = Y_train, epochs=10, batch_size=32)

# Compute conformity scores
scores_cal, scores_test = compute_deep_pu_scores(K, dim, X_train, Y_train, X_cal, Y_cal, X_test_part1, X_test_part2, 
                                                 train_model_fn = deep_models, two_step=False, oneclass_classifier=oneclass_classifier)

scores_cal_two_step, scores_test_two_step = compute_deep_pu_scores(K, dim, X_train, Y_train, X_cal, Y_cal, X_test_part1, X_test_part2, 
                                                                   train_model_fn = deep_models, two_step=True, oneclass_classifier=oneclass_classifier)

scores_cal_intersection, scores_test_intersection = compute_deep_pu_scores_intersection_two_step(K, dim, X_train, Y_train, X_cal, Y_cal, 
                                                                                                  X_test_part1, X_test_part2, 
                                                                                                  binary_classifier, oneclass_classifier, 
                                                                                                  deep_learning=True)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: Only input tensors may be passed as positional arguments. The following argument value should be passed as a keyword argument: 50 (of type <class 'int'>)

In [4]:
deep_models


<Sequential name=sequential, built=True>

### Plot

In [None]:
# Check and flatten the arrays if necessary
scores_test = np.ravel(scores_test)
scores_test_two_step = np.ravel(scores_test_two_step)
scores_test_intersection = np.ravel(scores_test_intersection)
Y_test_part2 = np.ravel(Y_test_part2)

# Plot the conformity scores
rcParams['figure.figsize'] = 11.7,8.27

sns.histplot(x=scores_test, hue=Y_test_part2, bins=50)
plt.show()

sns.histplot(x=scores_test_two_step, hue=Y_test_part2, bins=50)
plt.show()

sns.histplot(x=scores_test_intersection, hue=Y_test_part2, bins=50)
plt.show()