# Analysis of Predicted and Original Values for Test Data

## Objective
In this notebook, we will compare the predicted and original values of the parameters. The original data is presented in text files containing initial measurements, while the predicted values were generated by a model.

## Structure
1. **Import Libraries and Define Functions**:
    - Import the necessary libraries and define functions to extract data.

2. **Read and Prepare Data**:
    - Read data from text files and organize lists for analysis.

3. **Plot Graphs**:
    - Plot graphs to compare predicted and original values.


In [None]:
import os
import re
import yaml
import logging
import argparse
from argparse import Namespace
from src.utils.logging_config import setup_logging, log_config, log_model_params
from scripts.setup_utils import check_paths, validate_config, validate_model_params, default_model_type
from src.constants import EIGEN_ENERGY, QUALITY_FACTOR, THRESHOLD_GAIN
from src.models.create_model import create_model
from src.data.data_processing import process_data
from src.data.data_loader import load_data
from sklearn.model_selection import train_test_split
from src.data.vcsel_dataset import VCSELDataset
from src.training.train_model import train_model
from src.training.train_model_k_fold import train_model_k_fold
from src.training.test_model import test_model
from src.predict.predict import load_prediction_samples, predict
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
os.chdir('..')
original_dir = os.getcwd()
print(f"Original Working Directory: {original_dir}")

# SINGLE

## Prediction E

In [None]:
with open("config_single.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="eigen_energy"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "single//predictions_eigen_energy_20240515_014012-all.txt")

In [None]:
from src.data.utilities import denormalize_number
eigen_energy_pattern = re.compile(r"EIGEN_ENERGY_1 (\d+\.\d+) EIGEN_ENERGY_2 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = eigen_energy_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values



predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
predicted_eigen_energy_1 = [val[0] for val in predicted_values[:count]]
predicted_eigen_energy_2 = [val[1] for val in predicted_values[:count]]
original_eigen_energy_1 = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]
original_eigen_energy_2 = [denormalize_number('ENERGY', val[1])  for val in original_values[:count]]

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.scatter(original_eigen_energy_1, predicted_eigen_energy_1, color='blue', alpha=0.5)
plt.plot([min(original_eigen_energy_1), max(original_eigen_energy_1)],
         [min(original_eigen_energy_1), max(original_eigen_energy_1)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 1')
plt.ylabel('Predicted Eigen Energy 1')
plt.title('Predicted vs. True Values of Eigen Energy 1')
plt.grid(True)


plt.subplot(1, 2, 2)
plt.scatter(original_eigen_energy_2, predicted_eigen_energy_2, color='green', alpha=0.5)
plt.plot([min(original_eigen_energy_2), max(original_eigen_energy_2)],
         [min(original_eigen_energy_2), max(original_eigen_energy_2)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 2')
plt.ylabel('Predicted Eigen Energy 2')
plt.title('Predicted vs. True Values of Eigen Energy 2')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()


## Prediction Q

In [None]:
with open("config_single.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="quality_factor"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "single//predictions_quality_factor_20240515_225408_all.txt")

In [None]:
from src.data.utilities import log_denormalize

quality_prediction = re.compile(r"Q1 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = quality_prediction.search(line)
            if match:
                predicted_values.append(float(match.group(1)))
    return predicted_values


predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
print(count)
predicted_values = [val for val in predicted_values[:count]]
original_values = [log_denormalize(val[0]) for val in original_values[:count]]

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(original_values, predicted_values, color='blue', alpha=0.5)
plt.plot([min(original_values), max(original_values)],
         [min(original_values), max(original_values)], color='red', linestyle='--')
plt.xlabel('True Q')
plt.ylabel('Predicted Q')
plt.title('Predicted vs. True Values of Quality Factor for test data')
plt.grid(True)


plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()





## Prediction TMG

In [None]:
with open("config_single.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="threshold_material_gain"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "single//predictions_threshold_material_gain_20240518_211759_all.txt")

In [None]:
import matplotlib
from src.data.utilities import convert_k_to_gain, denormalize_number, convert_gain_to_k
#matplotlib.rcParams['text.usetex'] = True

TMG_pattern = re.compile(r"TRESHOLD_MATERIAL_GAIN (\d+\.\d+) ENERGY (\d+\.\d+)")


def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = TMG_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values

predicted_values = read_predicted_values(predicted_file)
original_values  = evaluate_data['output_data']

count = len(original_values)
print(len(original_values))

predicted_energy= [val[1] for val in predicted_values[:count]]
predicted_TMG = [val[0] for val in predicted_values[:count]]
original_TMG = [convert_k_to_gain(denormalize_number('ENERGY', val[0]), -val[1]) for val in original_values[:count]]
original_energy = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]


plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(original_TMG, predicted_TMG, color='blue', alpha=0.5)
plt.plot([min(original_TMG), max(original_TMG)],
         [min(original_TMG), max(original_TMG)], color='red', linestyle='--')
plt.xlabel('True Threshold material gain, $cm^{-1}$')
plt.ylabel('Predicted Threshold material gain, $cm^{-1}$')
plt.title('Predicted vs. True Values of Threshold material gain')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.scatter(original_energy, predicted_energy, color='green', alpha=0.5)
plt.plot([min(original_energy), max(original_energy)],
         [min(original_energy), max(original_energy)], color='red', linestyle='--')
plt.xlabel('True Energy, eV')
plt.ylabel('Predicted Energy, eV')
plt.title('Predicted vs. True Values of Energy')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()

# DBR

## Prediction E

In [None]:
with open("config_DBR.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="eigen_energy"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "DBR//predictions_eigen_energy_20240518_234435_all.txt")

In [None]:
from src.data.utilities import denormalize_number
eigen_energy_pattern = re.compile(r"EIGEN_ENERGY_1 (\d+\.\d+) EIGEN_ENERGY_2 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = eigen_energy_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values



predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
print(count)
predicted_eigen_energy_1 = [val[0] for val in predicted_values[:count]]
predicted_eigen_energy_2 = [val[1] for val in predicted_values[:count]]
original_eigen_energy_1 = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]
original_eigen_energy_2 = [denormalize_number('ENERGY', val[1])  for val in original_values[:count]]

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.scatter(original_eigen_energy_1, predicted_eigen_energy_1, color='blue', alpha=0.5)
plt.plot([min(original_eigen_energy_1), max(original_eigen_energy_1)],
         [min(original_eigen_energy_1), max(original_eigen_energy_1)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 1')
plt.ylabel('Predicted Eigen Energy 1')
plt.title('Predicted vs. True Values of Eigen Energy 1')
plt.grid(True)


plt.subplot(1, 2, 2)
plt.scatter(original_eigen_energy_2, predicted_eigen_energy_2, color='green', alpha=0.5)
plt.plot([min(original_eigen_energy_2), max(original_eigen_energy_2)],
         [min(original_eigen_energy_2), max(original_eigen_energy_2)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 2')
plt.ylabel('Predicted Eigen Energy 2')
plt.title('Predicted vs. True Values of Eigen Energy 2')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()


## Prediction Q

In [None]:
with open("config_DBR.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="quality_factor"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "DBR//predictions_quality_factor_20240525_151511_all2.txt")

In [None]:
from src.data.utilities import log_denormalize

quality_prediction = re.compile(r"Q1 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = quality_prediction.search(line)
            if match:
                predicted_values.append(float(match.group(1)))
    return predicted_values


predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
print(count)
print(len(predicted_values))
predicted_values = [val for val in predicted_values[:count]]
original_values = [log_denormalize(val[0])for val in original_values[:count]]

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(original_values, predicted_values, color='blue', alpha=0.5)
plt.plot([min(original_values), max(original_values)],
         [min(original_values), max(original_values)], color='red', linestyle='--')
plt.xlabel('True Q')
plt.ylabel('Predicted Q')
plt.title('Predicted vs. True Values of Quality Factor for test data')
plt.grid(True)
plt.xscale('log')
plt.yscale('log')
plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()





# VSCEL

## Prediction E

In [None]:
with open("config_VCSEL.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="eigen_energy"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "VCSEL//predictions_eigen_energy_20240522_155650_all.txt")

In [None]:
from src.data.utilities import denormalize_number
eigen_energy_pattern = re.compile(r"EIGEN_ENERGY_1 (\d+\.\d+) EIGEN_ENERGY_2 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = eigen_energy_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values



predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
print(count)
predicted_eigen_energy_1 = [val[0] for val in predicted_values[:count]]
predicted_eigen_energy_2 = [val[1] for val in predicted_values[:count]]
original_eigen_energy_1 = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]
original_eigen_energy_2 = [denormalize_number('ENERGY', val[1])  for val in original_values[:count]]

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.scatter(original_eigen_energy_1, predicted_eigen_energy_1, color='blue', alpha=0.5)
plt.plot([min(original_eigen_energy_1), max(original_eigen_energy_1)],
         [min(original_eigen_energy_1), max(original_eigen_energy_1)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 1')
plt.ylabel('Predicted Eigen Energy 1')
plt.title('Predicted vs. True Values of Eigen Energy 1')
plt.grid(True)


plt.subplot(1, 2, 2)
plt.scatter(original_eigen_energy_2, predicted_eigen_energy_2, color='green', alpha=0.5)
plt.plot([min(original_eigen_energy_2), max(original_eigen_energy_2)],
         [min(original_eigen_energy_2), max(original_eigen_energy_2)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 2')
plt.ylabel('Predicted Eigen Energy 2')
plt.title('Predicted vs. True Values of Eigen Energy 2')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()


## Prediction Q

In [None]:
with open("config_VCSEL.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="quality_factor"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "VCSEL//predictions_quality_factor_20240525_150414_all.txt")

In [None]:
from src.data.utilities import log_denormalize

quality_prediction = re.compile(r"Q1 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = quality_prediction.search(line)
            if match:
                predicted_values.append(float(match.group(1)))
    return predicted_values


predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
print(count)
predicted_values = [val for val in predicted_values[:count]]
original_values = [log_denormalize(val[0])for val in original_values[:count]]

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(original_values, predicted_values, color='blue', alpha=0.5)
plt.plot([min(original_values), max(original_values)],
         [min(original_values), max(original_values)], color='red', linestyle='--')
plt.xlabel('True Q')
plt.ylabel('Predicted Q')
plt.title('Predicted vs. True Values of Quality Factor for test data')
plt.grid(True)
plt.xscale('log')
plt.yscale('log')
plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()





## Prediction TMG

In [None]:
with open("config_VCSEL.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="threshold_material_gain"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "VCSEL//predictions_threshold_material_gain_20240526_004306_all.txt")

In [None]:
import matplotlib
from src.data.utilities import convert_k_to_gain, denormalize_number, convert_gain_to_k
#matplotlib.rcParams['text.usetex'] = True

TMG_pattern = re.compile(r"TRESHOLD_MATERIAL_GAIN (\d+\.\d+) ENERGY (\d+\.\d+)")


def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = TMG_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values

predicted_values = read_predicted_values(predicted_file)
original_values  = evaluate_data['output_data']

count = len(original_values)
print(len(original_values))

predicted_energy= [val[1] for val in predicted_values[:count]]
predicted_TMG = [val[0] for val in predicted_values[:count]]
original_TMG = [convert_k_to_gain(denormalize_number('ENERGY', val[0]), -val[1]) for val in original_values[:count]]
original_energy = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]


plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(original_TMG, predicted_TMG, color='blue', alpha=0.5)
plt.plot([min(original_TMG), max(original_TMG)],
         [min(original_TMG), max(original_TMG)], color='red', linestyle='--')
plt.xlabel('True Threshold material gain, $cm^{-1}$')
plt.ylabel('Predicted Threshold material gain, $cm^{-1}$')
plt.title('Predicted vs. True Values of Threshold material gain')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.scatter(original_energy, predicted_energy, color='green', alpha=0.5)
plt.plot([min(original_energy), max(original_energy)],
         [min(original_energy), max(original_energy)], color='red', linestyle='--')
plt.xlabel('True Energy, eV')
plt.ylabel('Predicted Energy, eV')
plt.title('Predicted vs. True Values of Energy')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()

# ALL

## Prediction E

In [None]:
with open("config.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="eigen_energy"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "predictions_eigen_energy_20240527_122943_all.txt")

In [None]:
from src.data.utilities import denormalize_number
eigen_energy_pattern = re.compile(r"EIGEN_ENERGY_1 (\d+\.\d+) EIGEN_ENERGY_2 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = eigen_energy_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values



predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']

count = len(original_values)
print(count)
predicted_eigen_energy_1 = [val[0] for val in predicted_values[:count]]
predicted_eigen_energy_2 = [val[1] for val in predicted_values[:count]]
original_eigen_energy_1 = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]
original_eigen_energy_2 = [denormalize_number('ENERGY', val[1])  for val in original_values[:count]]

with open('all_energy_1_prediction.txt', 'w') as file1:
    for predict_energy, original_energy in zip(predicted_eigen_energy_1, original_eigen_energy_1):
        file1.write(f"{predict_energy}, {original_energy}\n")

with open('all_energy_2_prediction.txt', 'w') as file1:
    for predict_energy, original_energy in zip(predicted_eigen_energy_2, original_eigen_energy_2):
        file1.write(f"{predict_energy}, {original_energy}\n")

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.scatter(original_eigen_energy_1, predicted_eigen_energy_1, color='blue', alpha=0.5)
plt.plot([min(original_eigen_energy_1), max(original_eigen_energy_1)],
         [min(original_eigen_energy_1), max(original_eigen_energy_1)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 1')
plt.ylabel('Predicted Eigen Energy 1')
plt.title('Predicted vs. True Values of Eigen Energy 1')
plt.grid(True)


plt.subplot(1, 2, 2)
plt.scatter(original_eigen_energy_2, predicted_eigen_energy_2, color='green', alpha=0.5)
plt.plot([min(original_eigen_energy_2), max(original_eigen_energy_2)],
         [min(original_eigen_energy_2), max(original_eigen_energy_2)], color='red', linestyle='--')
plt.xlabel('True Eigen Energy 2')
plt.ylabel('Predicted Eigen Energy 2')
plt.title('Predicted vs. True Values of Eigen Energy 2')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()


## Prediction Q

In [None]:
with open("config_ALL.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="quality_factor"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "predictions_quality_factor_20240527_210412_all.txt")

In [None]:
from src.data.utilities import log_denormalize
import numpy as np

quality_prediction = re.compile(r"Q1 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = quality_prediction.search(line)
            if match:
                predicted_values.append(float(match.group(1)))
    return predicted_values


predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']


count = len(original_values)
print(count)
predicted_values = [val for val in predicted_values[:count]]
original_values = [log_denormalize(val[0])for val in original_values[:count]]


plt.figure(figsize=(12, 6))

plt.scatter(original_values, predicted_values, color='blue', alpha=0.5)
plt.plot([min(original_values), max(original_values)],
         [min(original_values), max(original_values)], color='red', linestyle='--')
plt.xlabel('True Q')
plt.ylabel('Predicted Q')
plt.title('Predicted vs. True Values of Quality Factor for test data')
plt.grid(True)
plt.xscale('log')
plt.yscale('log')
plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()

## Prediction TMG

In [None]:
with open("config_ALL.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="threshold_material_gain"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "predictions_threshold_material_gain_20240528_181915_all.txt")

In [None]:
import matplotlib
from src.data.utilities import convert_k_to_gain, denormalize_number, convert_gain_to_k
#matplotlib.rcParams['text.usetex'] = True

TMG_pattern = re.compile(r"TRESHOLD_MATERIAL_GAIN (\d+\.\d+) ENERGY (\d+\.\d+)")


def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = TMG_pattern.search(line)
            if match:
                predicted_values.append((float(match.group(1)), float(match.group(2))))
    return predicted_values

predicted_values = read_predicted_values(predicted_file)
original_values  = evaluate_data['output_data']

zero_sum_indices = [i for i, data in enumerate(evaluate_data['input_data']) if data[3][10] != 0]

# Filter predicted and original values based on these indices
#filtered_predicted_values = [predicted_values[i] for i in zero_sum_indices]
#filtered_original_values = [original_values[i] for i in zero_sum_indices]

#count = len(filtered_predicted_values)
#print(len(filtered_predicted_values))

#predicted_energy= [val[1] for val in filtered_predicted_values[:count]]
#predicted_TMG = [val[0] for val in filtered_predicted_values[:count]]
#original_TMG = [convert_k_to_gain(denormalize_number('ENERGY', val[0]), -val[1]) for val in filtered_original_values[:count]]
#original_energy = [denormalize_number('ENERGY', val[0]) for val in filtered_original_values[:count]]

count = len(original_values)
print(len(original_values))

predicted_energy= [val[1] for val in predicted_values[:count]]
predicted_TMG = [val[0] for val in predicted_values[:count]]
original_TMG = [convert_k_to_gain(denormalize_number('ENERGY', val[0]), -val[1]) for val in original_values[:count]]
original_energy = [denormalize_number('ENERGY', val[0]) for val in original_values[:count]]


"""
with open('VCSEL_energy_prediction_SINGLE.txt', 'w') as file1:
    for predict_energy_, original_energy_ in zip(predicted_energy, original_energy):
        file1.write(f"{predict_energy_}, {original_energy_}\n")

with open('VCSEL_TMG_prediction.txt', 'w') as file1:
    for predicted_TMG_, original_TMG_ in zip(predicted_TMG, original_TMG):
        file1.write(f"{predicted_TMG_}, {original_TMG_}\n")
"""

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.scatter(original_TMG, predicted_TMG, color='blue', alpha=0.5)
plt.plot([min(original_TMG), max(original_TMG)],
         [min(original_TMG), max(original_TMG)], color='red', linestyle='--')
plt.xlabel('True Threshold material gain, $cm^{-1}$')
plt.ylabel('Predicted Threshold material gain, $cm^{-1}$')
plt.title('Predicted vs. True Values of Threshold material gain')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.scatter(original_energy, predicted_energy, color='green', alpha=0.5)
plt.plot([min(original_energy), max(original_energy)],
         [min(original_energy), max(original_energy)], color='red', linestyle='--')
plt.xlabel('True Energy, eV')
plt.ylabel('Predicted Energy, eV')
plt.title('Predicted vs. True Values of Energy')
plt.grid(True)

plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()

## Prediction Q Additionally

In [None]:
with open("config_ALL.yaml", 'r') as file:
    config = yaml.safe_load(file)

args = Namespace(
    train=config['runtime']['train'],
    predict=config['runtime']['predict'],
    load_weights=config['runtime']['load_weights'],

    data_path=config['data']['path'],
    model_path=config['model']['params_path'],
    weight_path=config['model']['weight_path'],

    predictions_dir=config['predictions']['directory'],
    samples_file=config['predictions']['samples_file'],

    batch_size=config['training']['batch_size'],
    num_epochs=config['training']['num_epochs'],
    learning_rate=config['training']['learning_rate'],
    use_scheduler=config['training']['use_scheduler'],
    scheduler_factor=config['training']['scheduler_params']['factor'],
    scheduler_patience=config['training']['scheduler_params']['patience'],
    model_type="quality_factor"
)

In [None]:
data = load_data(args.data_path, args.model_type)
train_data, evaluate_data = train_test_split(data, test_size=0.1, random_state=42)
train_data, validate_data = train_test_split(train_data, test_size=0.1, random_state=42)

train_data = process_data(train_data, args.model_path, args.model_type, is_train=True)
train_dataset = VCSELDataset(train_data)

validate_data = process_data(validate_data, args.model_path, args.model_type, is_validate=True)
validate_dataset = VCSELDataset(validate_data)

evaluate_data = process_data(evaluate_data, args.model_path, args.model_type, is_test=True)
evaluate_dataset = VCSELDataset(evaluate_data)

In [None]:
model = create_model(args.model_path, args.load_weights, args.weight_path)
predict(model, evaluate_dataset, args)

In [None]:
# Folder where the files are located
predictions_dir = "predictions"
predicted_file = os.path.join(predictions_dir, "predictions_quality_factor_20240610_232953.txt")

In [None]:
from src.data.utilities import log_denormalize
import numpy as np

quality_prediction = re.compile(r"Q1 (\d+\.\d+)")

def read_predicted_values(filepath):
    predicted_values = []
    with open(filepath, 'r') as file:
        for line in file:
            match = quality_prediction.search(line)
            if match:
                predicted_values.append(float(match.group(1)))
    return predicted_values


predicted_values = read_predicted_values(predicted_file)
original_values = evaluate_data['output_data']


count = len(original_values)
print(count)
predicted_values = [val for val in predicted_values[:count]]
original_values = [log_denormalize(val[0])for val in original_values[:count]]


plt.figure(figsize=(12, 6))

plt.scatter(original_values, predicted_values, color='blue', alpha=0.5)
plt.plot([min(original_values), max(original_values)],
         [min(original_values), max(original_values)], color='red', linestyle='--')
plt.xlabel('True Q')
plt.ylabel('Predicted Q')
plt.title('Predicted vs. True Values of Quality Factor for test data')
plt.grid(True)
plt.xscale('log')
plt.yscale('log')
plt.tight_layout()
#plt.savefig('path_to_scatter_plot.png')
plt.show()

