In [None]:
# Import required packages
import sys
# !{sys.executable} -m pip install seaborn
from glob import glob
import os
import json
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from shutil import copy,move
from distutils.dir_util import copy_tree


In [None]:
files = sorted(glob('./sine/*/*/*results.json'))
baseline = "../sixteenqubit_test/sin16qubit/A2_HWE-CZ/A2_HWE-CZ_results.json"

In [None]:

quantum_results = []
quantum_data = []
for i in files:
    with open(i,'r') as f:
        data = json.load(f)
    ratio = os.path.basename(os.path.dirname(os.path.dirname(i)))
    quantum_data.append((ratio,data['R2_train'],data['R2_test']))
    quantum_results.append(('A2_HWE-CZ',ratio,'R2_train',data['R2_train']))
    quantum_results.append(("A2_HWE-CZ",ratio,'R2_test',data['R2_test']))

quantum_data = np.array(quantum_data).astype(float)

In [None]:

with open(baseline,'r') as f:
    quantum_base = json.load(f)

In [None]:
function=['sine','quadratic','sine']
trainsize=[ 0.1,0.3, 0.5, 0.7,0.8]
classical_models=['ridge', 'lasso', 'elastic', 'knn', 'rfr', 'grad', 'svr', 'krr', 'gpr'
]

classical_results=[]

for models in classical_models:
    for ts in trainsize:
        with open(f"{os.path.expanduser('~')}/qregress/classical_testing/sine/{ts}_{models}_scores.json",'r') as f:
            classical_dict=json.load(f)
        classical_results.append((models,ts,'R2_train',classical_dict['R2_train']))
        classical_results.append((models,ts,'R2_test',classical_dict['R2_test']))
            

In [None]:
classical_df=pd.DataFrame(classical_results,columns=['model','ratio','set','value'])

In [None]:

quantum_df=pd.DataFrame(quantum_results,columns=['model','ratio','set','value'])

In [None]:
combined_df = pd.concat([classical_df,quantum_df])

In [None]:
quantum_df['ratio']

In [None]:
classical_df[classical_df['ratio']>0.1]['value'].describe()

In [None]:
quantum_df[quantum_df['ratio']>"0.1"]['value'].describe()

In [None]:
for ts in trainsize:
    print(ts)
    for m in classical_models:
        cdf=classical_df[(classical_df['ratio']==ts)&(classical_df['model']==m)].set_index('set')
        qdf=quantum_df[quantum_df['ratio']==f'{ts}'].set_index('set')
        print(m,all(cdf['value']>=qdf['value']))
        if cdf['value'].loc['R2_test']>=qdf['value'].loc['R2_test']:
            print(cdf['value'].loc['R2_test'],qdf['value'].loc['R2_test'])
    print()

In [None]:
palette=sns.color_palette('Paired',10)
model_pal = dict(zip(set(combined_df['model']),palette))


SMALL_SIZE = 8
MEDIUM_SIZE = 12
BIGGER_SIZE = 12

plt.rc('font', size=MEDIUM_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)    # legend fontsize
plt.rc('figure', titlesize=MEDIUM_SIZE)  # fontsize of the figure title
plt.figure(figsize=(8,5))
ax = sns.lineplot(data=combined_df,x='ratio',y='value',style='set',hue='model',palette=model_pal, markers=True)
ax.legend_.remove()

ins = ax.inset_axes([0.7,0.7,0.2,0.2])
sns.lineplot(data=combined_df[(combined_df['ratio']==0.1)&(combined_df['value']>0.8)],x='ratio',y='value',style='set',hue='model',palette=model_pal, markers=True,ax=ins)
ins.set_ylim(0.8,1.05)
ins.set_ylabel('R$^{2}$')
ins.set_xlabel('Training Set (%)')
ins.set_xticks([0.1])
ins.get_legend().remove()



# Add separate legend for hue (category)
handles, labels = ax.get_legend_handles_labels()
hue_legend = plt.legend(handles[1:11]+handles[12:], labels[1:11]+labels[12:], title='Model/Dataset', bbox_to_anchor=(0.3, 0.41), borderaxespad=0,ncols=3)

# Add separate legend for style
# style_legend = plt.legend(handles[12:], labels[12:], bbox_to_anchor=(1, 0.2), loc='upper left', borderaxespad=0.5)

# Add the hue legend back so both are displayed
plt.gca().add_artist(hue_legend)


ax.set_ylim(0,1.05)
ax.set_xlim(0.05,0.85)
ax.set_ylabel('R$^{2}$')
ax.set_xlabel('Training Set (%)')
plt.tight_layout()
# plt.savefig('/Users/grierjones/qregress/images/Function_Fitting/sine_learning_curves.png', bbox_inches='tight', pad_inches=0.1)
plt.show()

In [None]:
combined_df = combined_df.astype({'model':str,'ratio':float,"set":str,'value':float})
combined_df['set'] = [i.split('_')[1].capitalize() for i in combined_df['set']]

train = combined_df[(combined_df['model']=='A2_HWE-CZ')&(combined_df['set']=='Train')]
test = combined_df[(combined_df['model']=='A2_HWE-CZ')&(combined_df['set']=='Test')]

drop_pqc = combined_df[combined_df['model']!='A2_HWE-CZ']



In [None]:
drop_pqc

In [None]:
set(drop_pqc['model'])

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5),sharey=True)

sns.boxplot(data=drop_pqc,x='ratio',y='value',hue='set', native_scale=True,ax=ax1,palette=sns.color_palette('Paired',2))

# ins = ax1.inset_axes([0.55,0.15,0.4,0.4])
# sns.boxplot(data=drop_pqc[drop_pqc['ratio']==0.1],x='ratio',y='value',hue='set', native_scale=True,palette=sns.color_palette('Paired',2),ax=ins)
# # ins.set_ylim(-0.5,1.05)
# ins.set_ylabel('R$^{2}$')
# ins.set_xlabel('Training Set (%)')
# ins.set_xticks([0.1])
# ins.get_legend().remove()

# ax1.set_ylim(0.95,1.001)
ax1.set_xlim(0,0.9)
ax1.set_ylabel('R$^{2}$')
ax1.set_xlabel('Training Set (%)')
ax1.set_title('Classical Models')
# Convert x-axis labels to percentages
xticks = np.linspace(0, 0.9, 10)
ax1.set_xticks(xticks)
ax1.set_xticklabels([f"{int(x * 100)}%" for x in xticks])

ax2.plot(train['ratio'],train['value'],'-x',color=sns.color_palette('Paired',2)[0],label='Train')
ax2.plot(test['ratio'],test['value'],'-o',color=sns.color_palette('Paired',2)[1],label='Test')
ax2.legend()
# ax2.set_ylim(0,1)
ax2.set_xlim(0.0,0.9)
ax2.set_xticks(xticks)
ax2.set_xticklabels([f"{int(x * 100)}%" for x in xticks])
ax2.set_ylabel('R$^{2}$')
ax2.set_xlabel('Training Set (%)')
ax2.set_title('A2_HWE-CZ')
plt.tight_layout()
plt.savefig(f"{os.path.expanduser('~')}/qregress/images/Function_Fitting/sine_learning_curves.png", bbox_inches='tight', pad_inches=0.1)
plt.show()

In [None]:
check_lc = {}
for i in [0.1,0.3,0.5,0.7,0.8]:
    # print(i)
    check_lc[i]={}
    for m in {'elastic', 'gpr', 'grad', 'knn', 'krr', 'lasso', 'rfr', 'ridge', 'svr'}:
        pivotdf.loc['A2_HWE-CZ',i]
        isbetter = pivotdf.loc['A2_HWE-CZ',i]>=pivotdf.loc[m,i]
        isclose = np.isclose(np.round(pivotdf.loc['A2_HWE-CZ',i],3),np.round(pivotdf.loc[m,i].values,3),atol=1e-2)
        check_lc[i][m]={'Train Better':isbetter[0], "Test Better":isbetter[1], 'Train Close':isclose[0], 'Test Close':isclose[1]}
    #     print('Close: ',np.count_nonzero(isclose))
    #     print('Better: ',np.count_nonzero(isbetter))    
    #     # check_lc[i]
    # print()

In [None]:
for k in check_lc.keys():
    print(k)
    print(pd.concat([pd.DataFrame(check_lc[0.1]).T.sum(),(~pd.DataFrame(check_lc[0.1]).T).sum()],axis=1).rename(columns={0:'True',1:'False'}))