In [None]:
from qiskit_ibm_runtime import QiskitRuntimeService
import os
n_jobs = 16
os.environ["OMP_NUM_THREADS"] = str(n_jobs)
import joblib
import click
import json
import time
from glob import glob
import itertools
import collections.abc
import sys
from tqdm.notebook import tqdm
# !{sys.executable} -m pip install qcircuit
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pennylane as qml
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import re
from joblib import Parallel, delayed, dump, load
# !pip install tzlocal
from datetime import datetime
from tzlocal import get_localzone


In [None]:
def main(path):
    loss = []
    for i in sorted(glob(os.path.join(path, '*log.csv'))):
        print(f"Processing file: {i}")
        loss.append(split(i)[:, 1])  # Get only the loss values (assumed to be in the second column)
    print(500-np.hstack(loss).shape[0],"left")
#   plt.plot(np.hstack(loss))
#   plt.xlabel("Iteration")
#   plt.ylabel("Loss")
#   plt.title("Loss Curve")
#   plt.show()

def split(path):
    """Helper function to read CSV file and extract required columns."""
    save = []
    with open(path, 'r') as f:
        for line in f:
            if ':' in line:
                split_line = line.strip().split(',')
                # Assuming the structure of each line is: [timestamp, loss_value, ...]
                save.append((float(split_line[1]), float(split_line[2])))
    return np.array(save)

In [None]:
main('./')

In [None]:
df_model = pd.concat([pd.read_csv(i,index_col=0).dropna() for i in sorted(glob('*csv'))])

In [None]:
df_model.sort_index().index

In [None]:

df_model["timestamp"] = pd.to_datetime(df_model.index)
df_model["delta_time"] = df_model["timestamp"].diff()
df_model["delta_seconds"] = df_model["delta_time"].dt.total_seconds()


In [None]:
SMALL_SIZE = 8
MEDIUM_SIZE = 12
BIGGER_SIZE = 12
 
plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title

In [None]:
top=os.getcwd()
with open(os.path.join(top,'0.1_5_DDCC_train.bin'),'rb') as f:
    ddcc_train = joblib.load(f)

with open(os.path.join(top,'0.1_5_DDCC_test.bin'),'rb') as f:
    ddcc_test = joblib.load(f)

with open(os.path.join(top,'0.1_5_DDCC_scaler.bin'),'rb') as f:
    ddcc_scaler = joblib.load(f)

X_ddcc_train, y_ddcc_train = ddcc_train['X'],ddcc_train['y']
X_ddcc_test, y_ddcc_test = ddcc_test['X'],ddcc_test['y']

X_ddcc_train = X_ddcc_train.reshape(-1,64,5)
X_ddcc_test = X_ddcc_test.reshape(-1,64,5)
y_ddcc_train = y_ddcc_train.reshape(-1,64)
y_ddcc_test = y_ddcc_test.reshape(-1,64)


# X_train, y_train = X_ddcc_train, y_ddcc_train
# X_test, y_test = X_ddcc_test, y_ddcc_test
X_train, y_train = X_ddcc_train, y_ddcc_train
X_test, y_test = X_ddcc_test, y_ddcc_test

# X_train = [X_train[i:i+4] for i in range(0,len(X_train),4)]
# X_test = [X_test[i:i+4] for i in range(0,len(X_test),4)]
scaler = ddcc_scaler

# print(len(X_train),X_train[0].shape,X_train[-1].shape)
print(y_train.shape, y_test.shape)

In [None]:
def grab_data(path):
    '''
    Given a globbed path, return the job_n.txt, *_train.txt, and *_test.txt files
    '''
    iterations = []
    train_metric = []
    test_metric = []
    for i in path:
        try:
            numbers = int(re.search(r'jobs_(\d+)\.txt', i).group(1))
            iterations.append(i)
        except:
            if 'test' in i:
                test_metric.append(i)
            else:
                train_metric.append(i)
            
    return sorted(iterations,key=lambda x: int(re.search(r'jobs_(\d+)\.txt', x).group(1))), train_metric, test_metric

In [None]:
# def fetch_jobs_from_file(service, filename):
#     """Fetch all job results from a given file."""
#     with open(filename, 'r') as f:
#         job_ids = f.readlines()
#     return [service.job(job_id.strip()).result() for job_id in job_ids]

# def grab_jobs(lst):
#     """Parallelize over job files, fetching results concurrently."""


#     service = QiskitRuntimeService(
#         channel='ibm_quantum',
#         instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac'
#     )
#     # Parallel processing of files
#     jobs = Parallel(n_jobs=-1,backend='threading')(
#         delayed(fetch_jobs_from_file)(service, filename) for filename in tqdm(lst)
#     )
    
#     return np.array(jobs)


def fetch_jobs_from_file(filename):
    """Fetch all job results from a given file."""
    service = QiskitRuntimeService(
        channel='ibm_quantum',
        instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac'
    )    
    with open(filename, 'r') as f:
        job_ids = f.readlines()
    return [service.job(job_id.strip()) for job_id in job_ids]

def grab_jobs(lst):
    """Parallelize over job files, fetching results concurrently."""



    # Parallel processing of files
    jobs = Parallel(n_jobs=-1)(
        delayed(fetch_jobs_from_file)(filename) for filename in tqdm(lst)
    )
    
    return np.array(jobs)

In [None]:
# Set of jobs to exclude
EXCLUDED_JOBS = {
    'd0gyptvfbx30008w5j80',
    # Add more if needed
}

def get_service():
    try:
        return QiskitRuntimeService(
            channel='ibm_quantum',
            instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac'
        )
    except Exception:
        return QiskitRuntimeService(
            channel='ibm_quantum',
            instance='pinq-quebec-hub/univ-toronto/default'
        )

def openfiles(filename):
    from qiskit_ibm_runtime.exceptions import RuntimeJobNotFound  # ✅ local import

    service = get_service()

    with open(filename, 'r') as f:
        job_ids = [line.strip() for line in f if line.strip() not in EXCLUDED_JOBS]

    jobs = []
    runtimefailures = []
    otherexception = []
    for job_id in job_ids:
        try:
            job = service.job(job_id)
            jobs.append(job.usage_estimation)
        except RuntimeJobNotFound:
            print(f"[Skipped] Job not found: {job_id}")
            runtimefailures.append(job_id)
        except Exception as e:
            print(f"[Error] Problem with job {job_id}: {e}")
            otherexception.append(job_id)
    return jobs, runtimefailures, otherexception

filenames = glob('run*/*.txt')

job = Parallel(n_jobs=-1)(
    delayed(openfiles)(filename) for filename in tqdm(filenames)
)

In [None]:
np.mean([sum(j['quantum_seconds'] for j in i[0] if j['quantum_seconds'] is not None) for i in job]
)

In [None]:
(1448.1314576194297 * 500 ) / 60 / 60

In [None]:
201.1293691138097 / 24

In [None]:
from datetime import timedelta

# Input in hours
hours = 201.1293691138097

# Convert to timedelta
delta = timedelta(hours=hours)

# Extract days, hours, minutes
days = delta.days
hours = delta.seconds // 3600
minutes = (delta.seconds % 3600) // 60

print(f"{days} days, {hours} hours, {minutes} minutes")


In [None]:
len([i[1] for i in job if len(i[1])!=0])

In [None]:
len([i[0] for i in job if len(i[0])!=0])

In [None]:
df_model["timestamp"].sort_values().iloc[0]

In [None]:
# date = df_model["timestamp"].sort_values().iloc[0].to_pydatetime()
from datetime import datetime
date = datetime(2025,5,1) 
service = QiskitRuntimeService(
    channel='ibm_quantum',
    instance='pinq-quebec-hub/univ-toronto/default'
)
jobs_default = service.jobs(created_after=date,descending=True,instance='pinq-quebec-hub/univ-toronto/default',limit=None)

service = QiskitRuntimeService(
    channel='ibm_quantum',
    instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac'
)
jobs_group = service.jobs(created_after=date,descending=True,instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac',limit=None)

In [None]:
log_times = [i.to_pydatetime().astimezone() for i in df_model["timestamp"]]

In [None]:
found = []
for idx in range(1, len(log_times)):
    start = log_times[idx - 1]
    end = log_times[idx]
    for j in jobs_default + jobs_group:
        creation_time = j.creation_date.astimezone()
        if start <= creation_time <= end:
            found.append((idx, j))


In [None]:
min(j.creation_date.astimezone() for j in jobs_default)

In [None]:
checklast = sorted([(j.creation_date.astimezone(),j.job_id(),j.usage_estimation) for j in tqdm(jobs_default + jobs_group)])[-500*19:]

In [None]:
len(found),len(jobs_default + jobs_group)

In [None]:
8615 / 19

In [None]:
df_model["timestamp"].sort_values().iloc[0].to_pydatetime()

In [None]:
jobs_default[0].creation_date.astimezone()

In [None]:
500 * 19

In [None]:
def fetch(jobid):
    job = service.job(jobid).usage_estimation
    return job

In [None]:
data_recent =  Parallel(n_jobs=-1)(
        delayed(fetch)(i.job_id()) for i in tqdm(jobs_default+jobs_group) if i.primitive_id == 'estimator'
    )

In [None]:
print(f"{np.mean([i['quantum_seconds'] for i in data_recent if type(i['quantum_seconds']) is float]):.4f}")

In [None]:
len([i['quantum_seconds'] for i in data_recent if type(i['quantum_seconds']) is float])

In [None]:
sum([i['quantum_seconds'] for i in data_recent if type(i['quantum_seconds']) is float]) / 60 / 60

In [None]:
print(f"{(79.8972 * 500) / 60 / 60:.4f}")

In [None]:
np.max([i['quantum_seconds'] for i in data_recent if type(i['quantum_seconds']) is float])

In [None]:
np.min([i['quantum_seconds'] for i in data_recent if type(i['quantum_seconds']) is float])

In [None]:
len([list(i.values())[0] for i in data_recent])


In [None]:
8708 / 19

In [None]:
y_train.shape

In [None]:
8708

In [None]:

datadelta = datetime.datetime.now() - datetime.datetime(2025,5,6)
 
service = QiskitRuntimeService(
    channel='ibm_quantum',
    instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac'
)
jobs_in_last_months = service.jobs(created_after=datadelta,descending=True,instance='pinq-quebec-hub/univ-toronto/pr-hans-arno-jac',limit=None)

data_recent =  Parallel(n_jobs=-1)(
        delayed(fetch)(i.job_id()) for i in tqdm(jobs_in_last_months) if i.primitive_id == 'estimator'
    )

In [None]:
len(jobs_in_last_months)

In [None]:
data_recent =  Parallel(n_jobs=-1)(
        delayed(fetch)(i.job_id()) for i in tqdm(jobs_in_last_months) if i.primitive_id == 'estimator'
    )

In [None]:
jobs_in_last_months[0].primitive_id

In [None]:
jobs_in_last_months[0].job_id()

In [None]:
help(service.jobs)

In [None]:
service = QiskitRuntimeService(
    channel='ibm_quantum',
    instance='pinq-quebec-hub/univ-toronto/default'
)
service.job('d0crshyd8drg008zaby0')

In [None]:
help(service.jobs)

In [None]:
three_months_ago

In [None]:
jobs_in_last_months

In [None]:
jobids = []
for i in sorted(glob("run*/*txt"),key=lambda x: (x.split('/')[0],int(x.split('/')[1].replace('jobs_','').replace('.txt','')))):
    with open(i,'r') as f:
        jobids.append([i.strip() for i in f.readlines()])

In [None]:


[i.job_id() for i in jobs_in_last_months if i.job_id() in sum(jobids,[])]

In [None]:
data = []
for i in sorted(glob('run*')):
    if os.path.isdir(i):
        print(i)
        try:
            data.append(grab_jobs(sorted(glob(os.path.join(i,'job*txt')),key=lambda x: int(x.split('/')[1].replace('jobs_','').replace('.txt','')))))
        except:
            print(f"This one is cooked {i}")

In [None]:
data = grab_jobs(sorted(glob("run*/*txt"),key=lambda x: (x.split('/')[0],int(x.split('/')[1].replace('jobs_','').replace('.txt',''))))) 

In [None]:
statevectordf = pd.read_csv(os.path.join(os.path.expanduser('~'),'qregress/qml_DDCC/RUD_AL/5AL/A2_HWE-CNOT/A2_HWE-CNOT_predicted_values.csv'))
statevectordf['Predicted'] = [float(i.strip('[]')) for i in statevectordf['Predicted']]
statevectordf['Reference'] = [float(i.strip('[]')) for i in statevectordf['Reference']]
statevectordf['Device'] = len(statevectordf)*['State Vector']



In [None]:
statevectordf

In [None]:
spread = 10e-2
# Create the figure with a 2D grid (scatter + KDE for Predicted + KDE for Reference)
fig, axes = plt.subplots(
    2, 2, 
    figsize=(10, 10),
    gridspec_kw={'width_ratios': [4, 1.7], 'height_ratios': [1.7, 4]},
    constrained_layout=True,
    # sharey='row',  # Keep y-axis sharing, but remove sharex to control ticks manually,
    # sharex='col'
)
axes[0, -1].axis("off")
# sns.scatterplot(data=finaldf,x='Reference',y='Predicted',hue='Data',style='Data',markers=['o', 's'], edgecolors='black',ax=axes[1,0],palette=)
axes[1,0].scatter(y_ddcc_train.flatten(),y_1000_train.flatten(),marker='s',label="ibm_quebec Train R$^{2}=$"+f"{r2_score(y_ddcc_train.flatten(),y_1000_train.flatten()):.4f}",color=cmap[0], edgecolors='black')
axes[1,0].scatter(y_ddcc_test.flatten(),y_1000_test.flatten(),label="ibm_quebec Test R$^{2}=$"+f"{r2_score(y_ddcc_train.flatten(),y_1000_test.flatten()):.4f}",color=cmap[1], edgecolors='black')
# sns.scatterplot(data=statevectordf,hue='Data',x='Reference',y='Predicted',style='Data',markers=['d', 'D'], edgecolors='black',ax=axes[1,0],palette=cmap[2:4])
axes[1,0].plot(range(-1,2),range(-1,2),'k--')
axes[1,0].set_ylim(-spread,spread)
axes[1,0].set_xlim(-spread,spread)
axes[1,0].set_ylabel("Predicted t$_{2}$-amplitudes")
axes[1,0].set_xlabel("Calculated t$_{2}$-amplitudes")
axes[1,0].legend()


# sns.histplot(data=pd.concat([finaldf,statevectordf]),hue='Data',x='Reference',ax=axes[0,0],fill=True,palette=cmap[0:4])
# sns.histplot(data=finaldf,hue='Data',x='Reference',ax=axes[0,0],fill=True,palette=cmap[0:2],stat='probability',kde=True)
# axes[0,0].set_yscale('log')
sns.kdeplot(data=finaldf,hue='Data',x='Reference',ax=axes[0,0],fill=True, bw_adjust=2,palette=cmap[0:2])
# sns.kdeplot(data=statevectordf,hue='Data',x='Reference',ax=axes[0,0],fill=True, bw_adjust=2,palette=cmap[3:5])
axes[0,0].set_xticklabels([])  # Hide labels but keep ticks
axes[0,0].set_xlabel("")  # Remove x-labels
axes[0,0].set_xlim(-spread,spread)
axes[0,0].set_ylim(1,60)
# axes[0,0].legend(loc=3)

# sns.histplot(data=pd.concat([finaldf,statevectordf]),hue='Data',y='Predicted',ax=axes[1,1],fill=True,palette=cmap[0:4])
# sns.histplot(data=finaldf,hue='Data',y='Predicted',ax=axes[1,1],fill=True,palette=cmap[0:2],stat='probability',kde=True)
# axes[1,1].set_xscale('log')
sns.kdeplot(data=finaldf,hue='Data',y='Predicted',ax=axes[1,1],fill=True, bw_adjust=2,palette=cmap[0:2])
# sns.kdeplot(data=statevectordf,hue='Data',y='Predicted',ax=axes[1,1],fill=True, bw_adjust=2,palette=cmap[3:5])
# axes[1,1].set_xticks(np.hstack([0,np.logspace(0,4,3)]))
# axes[1,1].set_xticklabels(['0']+["10$^{"+f"{np.log10(i):n}"+"}$" for i in np.logspace(0,4,3)])  # Hide labels but keep ticks
axes[1,1].set_yticklabels([])  # Hide labels but keep ticks
axes[1,1].set_ylabel("")  # Remove x-labels
axes[1,1].set_ylim(-spread,spread)
axes[1,1].set_xlim(1,60)
plt.subplots_adjust(wspace=0.1, hspace=0.1)  # Adjust width and height spacing
plt.tight_layout()
plt.savefig(os.path.join(os.path.expanduser('~'),'qregress/images/DDCC/finalibm_vs_statevector.png'),dpi=300,bbox_inches='tight')
plt.show()

In [None]:
len([len(y_ddcc_train.flatten())*['Train']]+[len(y_ddcc_test.flatten())*['Test']])

In [None]:
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np
# from sklearn.metrics import r2_score

# fig, ax = plt.subplots(figsize=(8, 6))

# # Scatter plots
# ax.scatter(y_ddcc_train.flatten(), y_1000_train.flatten(), 
#            label="ibm_quebec Train R$^{2}=$"+f"{r2_score(y_ddcc_train.flatten(), y_1000_train.flatten()):.4f}", 
#            color='b', edgecolors='black')
# ax.scatter(y_ddcc_test.flatten(), y_1000_test.flatten(), 
#            label="ibm_quebec Test R$^{2}=$"+f"{r2_score(y_ddcc_test.flatten(), y_1000_test.flatten()):.4f}", 
#            color='g', edgecolors='black')

# sns.scatterplot(data=statevectordf[statevectordf['Data'] == 'Train'], x='Reference', y='Predicted', 
#                 label="State Vector Train: R$^{2}=$"+f"{r2_score(statevectordf[statevectordf['Data'] == 'Train']['Reference'], statevectordf[statevectordf['Data'] == 'Train']['Predicted']):.4f}", 
#                 ax=ax, edgecolor='black')

# sns.scatterplot(data=statevectordf[statevectordf['Data'] == 'Test'], x='Reference', y='Predicted', 
#                 label="State Vector Test: R$^{2}=$"+f"{r2_score(statevectordf[statevectordf['Data'] == 'Test']['Reference'], statevectordf[statevectordf['Data'] == 'Test']['Predicted']):.4f}", 
#                 ax=ax, edgecolor='black')

# # Identity line
# ax.plot(np.linspace(-1, 2, 100), np.linspace(-1, 2, 100), 'k--')

# ax.set_ylim(-3e-2, 3e-2)
# ax.set_xlim(-3e-2, 3e-2)
# ax.set_ylabel("Predicted t$_{2}$-amplitudes")
# ax.set_xlabel("Calculated t$_{2}$-amplitudes")
# ax.legend()
# plt.tight_layout()

# # Create twin axes for KDE plots
# top_ax = ax.twiny()
# right_ax = ax.twinx()

# # KDE distributions
# sns.kdeplot(statevectordf['Reference'], ax=top_ax, color='gray', lw=2, clip=(-3e-2, 3e-2))
# sns.kdeplot(statevectordf['Predicted'], ax=right_ax, color='gray', lw=2, clip=(-3e-2, 3e-2))

# # Hide tick labels for KDE axes
# top_ax.set_xticks([])
# right_ax.set_yticks([])

# plt.show()


In [None]:
len(MSE_10+MSE_100+MSE_500+MSE_1000)

In [None]:
plt.scatter(min_MSE_idx,combined_MSE[min_MSE_idx],color='r',label=f'MSE:{combined_MSE[min_MSE_idx]:.4e}')
plt.plot(range(1,len(MSE_10+MSE_100+MSE_500+MSE_1000)+1),MSE_10+MSE_100+MSE_500+MSE_1000)
plt.ylabel('Training Loss (MSE)')
plt.xlabel('Iterations')
plt.xlim(0,900)
plt.ylim(0,8e-4)
# plt.hlines(statevector['MSE_train'][0],-100,1e4,color='r',linestyle='--',label=f'State Vector MSE:{statevector['MSE_train'][0]:.4e}')
# plt.yscale('log')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(os.path.expanduser('~'),'qregress/images/DDCC/ibmq_loss.png'),dpi=300,bbox_inches='tight')
plt.show()