In [1]:
import os
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import SpectralClustering
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.metrics import precision_score, recall_score, f1_score

def active_learning(material, cube_numbers, cube_names):

  # Function to load dictionary from file
    def load_dictionary(file_path):
        with open(file_path, 'rb') as file:
            return pickle.load(file)

    base_path = f'./Data/Neuchatel_{material}/'
    cube_numbers_str = '_'.join(map(str, cube_numbers))

    file_name = f"{material}_cubes{cube_numbers_str}_feat.pkl"    
    file_path = os.path.join(base_path, file_name)

    data_base = load_dictionary(file_path)

    # Function to perform initial clustering and training
    def initial_clustering_and_training(data_base, power):
        print(f"Performing initial clustering for power: {power}")
        example = data_base[power]
        #print(example)
        algorithm = SpectralClustering(n_clusters=2, random_state=1995)
        labels = algorithm.fit_predict(example)
        if labels[0] != 1:
            labels = 1 - labels
        speeds = example[:, -2] * 1800
        power_speed_pairs = np.column_stack((np.full(speeds.shape, power), speeds))
        print(f"Clustering completed for power: {power}")
        return power_speed_pairs, labels

    # Function to train surrogate model using Gaussian Process
    def train_surrogate_model(X, y):
        rbf_kernel = C(1.0) * RBF(length_scale=[10.61, 353.55])
        gp_model = GaussianProcessClassifier(kernel=rbf_kernel, random_state=1995)
        gp_model.fit(X, y)
        print("Gaussian Process model training completed.")
        return gp_model

    # Function to predict labels with uncertainty using GP
    def predict_with_uncertainty(gp_model, X):
        probs = gp_model.predict_proba(X)
        uncertainties = 1 - np.max(probs, axis=1)
        return probs, uncertainties

    # Function to compute evaluation metrics against ground truth
    def compute_metrics(pred_labels, ground_truth):
        gt_labels = ground_truth['Labels']
        precision = precision_score(gt_labels, pred_labels)
        recall = recall_score(gt_labels, pred_labels)
        f1 = f1_score(gt_labels, pred_labels)
        return precision, recall, f1

    # Function to find next power based on uncertainty
    def find_next_power(gp_model, all_powers, explored_powers, speeds):
        unexplored_powers = [p for p in all_powers if p not in explored_powers]
        uncertainties = []

        for power in unexplored_powers:
            test_data = np.array([[power, speed] for speed in speeds])
            _, uncertainty = predict_with_uncertainty(gp_model, test_data)
            uncertainties.append((power, np.mean(uncertainty)))

        next_power = max(uncertainties, key=lambda x: x[1])[0]
        print(f"Next power selected based on uncertainty: {next_power}")
        return next_power

    # Main experiment function
    def run_experiment(initial_power):
        print(f"Starting experiment with initial power: {initial_power}")
        power_speed_pairs, labels = initial_clustering_and_training(data_base, initial_power)
        #print(labels)

        # Train initial GP model
        gp_model = train_surrogate_model(power_speed_pairs, labels)

        # Predict on all (power, speed) pairs in ground truth
        gt_data = np.array(list(zip(data_base['GT']['Powers'], data_base['GT']['Speeds'])))
        pred_probs, _ = predict_with_uncertainty(gp_model, gt_data)
        pred_labels = np.argmax(pred_probs, axis=1)

        # Initialize metrics storage
        metrics = {
          'experiments': [],
          'precision': [],
          'recall': [],
          'f1_score': []
        }

        # Compute and store initial evaluation metrics
        precision, recall, f1 = compute_metrics(pred_labels, data_base['GT'])
        metrics['experiments'].append(len(labels))
        metrics['precision'].append(precision)
        metrics['recall'].append(recall)
        metrics['f1_score'].append(f1)

        # Iterative process to find next power and update model
        explored_powers = [initial_power]
        speeds = data_base['GT']['Speeds']

        while len(explored_powers) < len(powers):
            next_power = find_next_power(gp_model, powers, explored_powers, speeds)
            new_power_speed_pairs, new_labels = initial_clustering_and_training(data_base, next_power)
            #print(new_labels)

            # Update training data and retrain GP model
            power_speed_pairs = np.vstack((power_speed_pairs, new_power_speed_pairs))
            labels = np.hstack((labels, new_labels))
            gp_model = train_surrogate_model(power_speed_pairs, labels)

            # Predict on all (power, speed) pairs in ground truth
            pred_probs, _ = predict_with_uncertainty(gp_model, gt_data)
            pred_labels = np.argmax(pred_probs, axis=1)

            # Compute and store evaluation metrics
            precision, recall, f1 = compute_metrics(pred_labels, data_base['GT'])
            print(pred_labels, data_base['GT'])
            metrics['experiments'].append(len(labels))
            metrics['precision'].append(precision)
            metrics['recall'].append(recall)
            metrics['f1_score'].append(f1)

            explored_powers.append(next_power)
        
        return pd.DataFrame(metrics)

    # Run experiments for all initial powers
    powers = [k for k in data_base.keys() if k != 'GT']
    all_results = []

    for initial_power in powers:
        result = run_experiment(initial_power)
        all_results.append(result)

    # Calculate mean and standard deviation of metrics
    mean_results = pd.concat(all_results).groupby(level=0).mean()
    std_results = pd.concat(all_results).groupby(level=0).std()

    # Create final results DataFrame
    final_results = pd.DataFrame({
      'Number of Experiments': mean_results['experiments'],
      'F1-score (mean)': mean_results['f1_score'],
      'F1-score (std)': std_results['f1_score'],
      'Precision (mean)': mean_results['precision'],
      'Precision (std)': std_results['precision'],
      'Recall (mean)': mean_results['recall'],
      'Recall (std)': std_results['recall']
    })

    # Round the values to 4 decimal places
    final_results = final_results.round(4)

    return final_results


In [2]:
material = 'Ti64'
cube_numbers = [1, 2]
cube_names = 'A & B'

results = active_learning(material, cube_numbers, cube_names)
results

Starting experiment with initial power: 120
Performing initial clustering for power: 120
Clustering completed for power: 120
Gaussian Process model training completed.
Next power selected based on uncertainty: 90
Performing initial clustering for power: 90
Clustering completed for power: 90
Gaussian Process model training completed.
[1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 1 1] {'Labels': [0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1], 'Speeds': [1050, 800, 1800, 1300, 550, 800, 1300, 1050, 1550, 1800, 800, 300, 1300, 1050, 300, 550, 1550, 1550, 300, 550], 'Powers': [90, 90, 120, 120, 120, 120, 90, 105, 90, 105, 105, 120, 105, 120, 105, 90, 105, 120, 90, 105]}
Next power selected based on uncertainty: 105
Performing initial clustering for power: 105
Clustering completed for power: 105
Gaussian Process model training completed.
[1 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1 0 0 1 1] {'Labels': [0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1], 'Speeds': [1050, 800, 1800, 1300, 55

Unnamed: 0,Number of Experiments,F1-score (mean),F1-score (std),Precision (mean),Precision (std),Recall (mean),Recall (std)
0,6.6667,0.8717,0.1019,0.8426,0.2048,0.9394,0.105
1,13.0,0.9043,0.0904,0.9074,0.016,0.9091,0.1575
2,20.0,0.9565,0.0,0.9167,0.0,1.0,0.0


In [3]:
material = 'Ti64'
cube_numbers = [3, 4]
cube_names = 'C & D'

results = active_learning(material, cube_numbers, cube_names)
results

Starting experiment with initial power: 120
Performing initial clustering for power: 120
Clustering completed for power: 120
Gaussian Process model training completed.
Next power selected based on uncertainty: 90
Performing initial clustering for power: 90
Clustering completed for power: 90
Gaussian Process model training completed.
[0 0 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 1] {'Labels': [0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1], 'Speeds': [1550, 1300, 1050, 300, 550, 1050, 1800, 300, 800, 1300, 800, 800, 1050, 1550, 550, 300, 1800, 1300, 1550, 550], 'Powers': [90, 120, 90, 120, 120, 120, 105, 105, 90, 90, 105, 120, 105, 105, 105, 90, 120, 105, 120, 90]}
Next power selected based on uncertainty: 105
Performing initial clustering for power: 105
Clustering completed for power: 105
Gaussian Process model training completed.
[0 0 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 1] {'Labels': [0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1], 'Speeds': [1550, 1300, 1050, 300, 55

Unnamed: 0,Number of Experiments,F1-score (mean),F1-score (std),Precision (mean),Precision (std),Recall (mean),Recall (std)
0,6.6667,0.8033,0.1647,1.0,0.0,0.6923,0.2308
1,13.0,0.8182,0.0,1.0,0.0,0.6923,0.0
2,20.0,0.8182,0.0,1.0,0.0,0.6923,0.0


In [4]:
# Load the data
material = '316L'
cube_numbers = [3, 4]
cube_names = 'A & B'

results = active_learning(material, cube_numbers, cube_names)
results

Starting experiment with initial power: 120
Performing initial clustering for power: 120
Clustering completed for power: 120
Gaussian Process model training completed.
Next power selected based on uncertainty: 90
Performing initial clustering for power: 90
Clustering completed for power: 90
Gaussian Process model training completed.
[0 0 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 0 1] {'Labels': [0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1], 'Speeds': [1550, 1300, 1050, 300, 550, 1050, 1800, 300, 800, 1300, 800, 800, 1050, 1550, 550, 300, 1800, 1300, 1550, 550], 'Powers': [90, 120, 90, 120, 120, 120, 105, 105, 90, 90, 105, 120, 105, 105, 105, 90, 120, 105, 120, 90]}
Next power selected based on uncertainty: 105
Performing initial clustering for power: 105
Clustering completed for power: 105
Gaussian Process model training completed.
[0 0 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 0 1] {'Labels': [0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1], 'Speeds': [1550, 1300, 1050, 300, 55

Unnamed: 0,Number of Experiments,F1-score (mean),F1-score (std),Precision (mean),Precision (std),Recall (mean),Recall (std)
0,6.6667,0.9412,0.0,0.8889,0.0,1.0,0.0
1,13.0,0.9412,0.0,0.8889,0.0,1.0,0.0
2,20.0,0.9412,0.0,0.8889,0.0,1.0,0.0


In [5]:
# Load the data
material = '316L'
cube_numbers = [5, 6]
cube_names = 'C & D'

results = active_learning(material, cube_numbers, cube_names)
results

Starting experiment with initial power: 120
Performing initial clustering for power: 120
Clustering completed for power: 120
Gaussian Process model training completed.
Next power selected based on uncertainty: 90
Performing initial clustering for power: 90
Clustering completed for power: 90
Gaussian Process model training completed.
[0 0 0 1 0 0 0 1 0 0 0 1 1 1 0 1 0 1 1 1] {'Labels': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0], 'Speeds': [1300, 1550, 1800, 300, 1550, 1550, 1050, 800, 1300, 800, 1300, 550, 300, 800, 1050, 550, 1800, 300, 550, 1050], 'Powers': [90, 105, 120, 120, 90, 120, 105, 105, 105, 90, 120, 120, 105, 120, 90, 105, 105, 90, 90, 120]}
Next power selected based on uncertainty: 105
Performing initial clustering for power: 105
Clustering completed for power: 105
Gaussian Process model training completed.
[0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 1 1 1] {'Labels': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0], 'Speeds': [1300, 1550, 1800, 300, 15

Unnamed: 0,Number of Experiments,F1-score (mean),F1-score (std),Precision (mean),Precision (std),Recall (mean),Recall (std)
0,6.6667,0.8381,0.033,0.8889,0.1925,0.8333,0.1443
1,13.3333,0.8941,0.0815,0.8783,0.0183,0.9167,0.1443
2,20.0,0.875,0.0,0.875,0.0,0.875,0.0
