In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import sys
sys.path.append('./src/')
import asymptotics, linear_models, dataset_utils, utils

#### Example: numerics 

In [None]:
L = 100
mp = np.array([0.4 + 0.50/np.sqrt(L) for _ in range(L)]);
mn = np.array([0.4 - 0.50/np.sqrt(L) for _ in range(L)]);
N_values = [50,100,200,300,400,500,600,700,800,900,1000]
repeats, Psize = 50, 200
results = []

for N_val in tqdm( N_values ):
    
    data = dataset_utils.Sample_dataset(L = L, m_positive = mp, m_negative = mn)
    data.prepare_dataset(P_size = Psize, N_size = N_val )
    
    X_train, Y_train = data.train_dataset, data.train_labels
    X_test, Y_test = data.test_dataset, data.test_labels
    
    tmp_acc_scores, tmp_e_scores = [], []
    for _ in range(repeats):
        mdl = linear_models.Model_Spins( L = L, X = X_train, Y = Y_train, u = L, margin= 0.5 )
        opt_w = mdl.training()
        
        tmp_acc_scores.append( mdl.acc_scores(X_test, Y_test) )
        tmp_e_scores.append( mdl.e_training() )
        
        del mdl
    results.append([N_val, sum(tmp_acc_scores)/float(repeats), sum(tmp_e_scores)/float(repeats) ])
        
results = np.array(results)

#### Example: asymptotics

In [None]:
mC = 0.4*np.ones(L)
δ = 1*np.ones(L)
δ = np.sqrt(L)*δ/np.linalg.norm(δ)

cov = np.diag(1-mC**2)
solver = asymptotics.SP_equations(L = L, C = cov, d = δ, margin = 0.5)

results_theory = [];
Tstep = 150
aNvals = np.linspace(0.2,10.0,num=Tstep)

for aN in tqdm(aNvals):    
    results_theory.append( solver.execute_saddpeqs_aN_cov(Psize/L,aN) )

In [None]:
values = np.array(results_theory).T
bacc = []
for val in values.T:
    aP, aN, q, r, b, eT = val
    metrics = asymptotics.Test_metrics(Q=q, R = r, B = b)
    bacc.append (metrics.BA())

In [None]:
fig, ax = plt.subplots(figsize = (12,4), ncols=2)

ax[0].plot(results[:,0]/(L), results[:,2]/(results[:,0] + Psize ) )
ax[0].plot(values[1,:], values[-1,:] )

ax[1].plot(results[:,0]/(L), results[:,1] )
ax[1].plot(values[1,:], bacc )

#### Multistate dataset 

In [2]:
PATH = './dataset/SaSc_dimer_bounded_mildbounded.csv'
Potts = dataset_utils.Dataset_wPotts(PATH=PATH)
df = pd.read_csv(PATH)

Q, L = df.max()/max() +1, df.shape[1] -1

Γ,Δ = Potts.buildGD() ### build Gamma, Delta matrices
solver = asymptotics.SP_equations(L = L, mode= 'Potts', Q = Q, G = Γ, D= Δ, margin = 5.00)