In [None]:
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os, sys, json, joblib
import pandas as pd
from sklearn.metrics import r2_score

In [None]:
files = sorted(glob('./*/*/*results.json'))

In [None]:

quantum_results = []
quantum_data = []
for i in files:
    print(i)
    with open(i,'r') as f:
        data = json.load(f)
    ratio = os.path.basename(os.path.dirname(os.path.dirname(i)))
    quantum_data.append((ratio,data['R2_train'],data['R2_test']))
    quantum_results.append(('A2_HWE-CNOT',ratio,'R2_train',data['R2_train']))
    quantum_results.append(("A2_HWE-CNOT",ratio,'R2_test',data['R2_test']))

quantum_data = np.array(quantum_data).astype(float)

In [None]:

trainsize=[ 0.1,0.3, 0.5, 0.7,0.8]
classical_models=['ridge', 'lasso', 'elastic', 'knn', 'rfr', 'grad', 'svr', 'krr', 'gpr'
]

classical_results=[]

for models in classical_models:
    for ts in trainsize:
        
        with open(f"./classical/{ts}_{models}_scores.json",'r') as f:
            classical_dict=json.load(f)
        classical_results.append((models,ts,'R2_train',classical_dict['R2_train']))
        classical_results.append((models,ts,'R2_test',classical_dict['R2_test']))
            

In [None]:
classical_df=pd.DataFrame(classical_results,columns=['model','ratio','set','value'])

In [None]:
quantum_df=pd.DataFrame(quantum_results,columns=['model','ratio','set','value'])

In [None]:
combined_df = pd.concat([classical_df,quantum_df])

In [None]:
for ts in trainsize:
    print(ts)
    for m in classical_models:
        cdf=classical_df[(classical_df['ratio']==ts)&(classical_df['model']==m)].set_index('set')
        qdf=quantum_df[quantum_df['ratio']==f'{ts}'].set_index('set')
        print(m,all(cdf['value']>=qdf['value']))
        if cdf['value'].loc['R2_test']>=qdf['value'].loc['R2_test']:
            print(cdf['value'].loc['R2_test'],qdf['value'].loc['R2_test'])
    print()

In [None]:
combined_df = combined_df.astype({'model':str,'ratio':float,"set":str,'value':float})
combined_df['set'] = [i.split('_')[1].capitalize() for i in combined_df['set']]

train = combined_df[(combined_df['model']=='A2_HWE-CNOT')&(combined_df['set']=='Train')]
test = combined_df[(combined_df['model']=='A2_HWE-CNOT')&(combined_df['set']=='Test')]

drop_pqc = combined_df[combined_df['model']!='A2_HWE-CNOT']

In [None]:


fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5),sharey=True)

sns.boxplot(data=drop_pqc,x='ratio',y='value',hue='set', native_scale=True,ax=ax1,palette=sns.color_palette('Paired',2))

# ins = ax1.inset_axes([0.55,0.15,0.4,0.4])
# sns.boxplot(data=drop_pqc[drop_pqc['ratio']==0.1],x='ratio',y='value',hue='set', native_scale=True,palette=sns.color_palette('Paired',2),ax=ins)
# ins.set_ylim(-0.5,1.05)
# ins.set_ylabel('R$^{2}$')
# ins.set_xlabel('Training Set Ratio')
# ins.set_xticks([0.1])
# ins.get_legend().remove()

ax1.set_ylim(0,1.01)
ax1.set_xlim(0,0.9)
ax1.set_ylabel('R$^{2}$')
ax1.set_xlabel('Training Set (%)')
ax1.set_title('Classical Models')
# Convert x-axis labels to percentages
xticks = np.linspace(0, 0.9, 10)
ax1.set_xticks(xticks)
ax1.set_xticklabels([f"{int(x * 100)}%" for x in xticks])

ax2.plot(train['ratio'],train['value'],'-x',color=sns.color_palette('Paired',2)[0],label='Train')
ax2.plot(test['ratio'],test['value'],'-o',color=sns.color_palette('Paired',2)[1],label='Test')
ax2.legend()
ax2.set_xticks(xticks)
ax2.set_xticklabels([f"{int(x * 100)}%" for x in xticks])

# ax2.set_ylim(0.95,1)
ax2.set_xlim(0.0,0.9)
ax2.set_ylabel('R$^{2}$')
ax2.set_xlabel('Training Set (%)')
ax2.set_title('A2_HWE-CNOT')
plt.tight_layout()
plt.savefig(f"{os.path.expanduser('~')}/qregress/images/DDCC/DDCC_learning_curves.png", bbox_inches='tight', pad_inches=0.1)
plt.show()

In [None]:
r_df = pd.DataFrame(quantum_data,columns=['index','Train','Test'])
ax = sns.lineplot(r_df.melt(id_vars=['index'], value_vars=['Train','Test']),x='index',y='value',hue='variable',style='variable',palette=sns.color_palette('Paired',2),markers=['>','<'])
plt.ylim(0,1)
# plt.gca().xaxis.set_major_formatter(mticker.PercentFormatter(xmax=100))  # Adjust xmax if needed
# ax.set_xtick([f"{int(float(i)*100)}%" for i in np.array(r_df['index'],dtype=float)])
ax.set_xticks(xticks)
ax.set_xticklabels([f"{int(x * 100)}%" for x in xticks])

plt.ylabel('R$^{2}$')
plt.xlabel('Training Set (%)')
plt.title('A2_HWE-CNOT')
plt.legend(loc=2)
plt.tight_layout()
plt.savefig(f"{os.path.expanduser('~')}/qregress/images/DDCC/DDCC_learning_curves.png", bbox_inches='tight', pad_inches=0.1)
plt.show()

In [None]:
pivotdf = combined_df.pivot(index=["model", "ratio"], columns="set", values="value")[['Train','Test']]

In [None]:
check_lc = {}
for i in [0.1,0.3,0.5,0.7,0.8]:
    # print(i)
    check_lc[i]={}
    for m in {'elastic', 'gpr', 'grad', 'knn', 'krr', 'lasso', 'rfr', 'ridge', 'svr'}:
        pivotdf.loc['A2_HWE-CNOT',i]
        isbetter = pivotdf.loc['A2_HWE-CNOT',i]>=pivotdf.loc[m,i]
        isclose = np.isclose(np.round(pivotdf.loc['A2_HWE-CNOT',i],3),np.round(pivotdf.loc[m,i].values,3),atol=1e-2)
        check_lc[i][m]={'Train Better':isbetter[0], "Test Better":isbetter[1], 'Train Close':isclose[0], 'Test Close':isclose[1]}
    #     print('Close: ',np.count_nonzero(isclose))
    #     print('Better: ',np.count_nonzero(isbetter))    
    #     # check_lc[i]
    # print()

In [None]:
for k in check_lc.keys():
    print(k)
    print(pd.concat([pd.DataFrame(check_lc[0.1]).T.sum(),(~pd.DataFrame(check_lc[0.1]).T).sum()],axis=1).rename(columns={0:'True',1:'False'}))