In [1]:
import numpy as np
from scipy.integrate import quad
import matplotlib.pyplot as plt
import sys


sys.path.insert(1, '../g3m_utils/')
from state_evolution.data_models.custom import Custom,CustomSpectra
from state_evolution.experiments.learning_curve import CustomExperiment
import pandas as pd
from scipy.special import erf
import pickle

In [2]:
d=600
k1=2400
k2=2400
kt=1200

In [3]:
Sigma=np.ones(d) #covariance matrix of data

def door(x):
    return np.sign(x)*np.minimum(np.abs(x),2)*1.1

In [4]:
activ="tanh"

f1=lambda x: np.tanh(x)
f2=lambda x: f1(x)#erf(x)

r_1=np.mean(Sigma)

k1_1=quad(lambda x: np.exp(-.5*x**2/r_1)*x*f1(x)/np.sqrt(2*np.pi*r_1),-np.inf,np.inf)[0]/r_1
var_1=quad(lambda x: np.exp(-.5*x**2/r_1)*f1(x)**2/np.sqrt(2*np.pi*r_1),-np.inf,np.inf)[0]
ks_1=np.sqrt(var_1-r_1*k1_1**2)

r_2=var_1
k1_2=quad(lambda x: np.exp(-.5*x**2/r_2)*x*f2(x)/np.sqrt(2*np.pi*r_2),-np.inf,np.inf)[0]/r_2
var_2=quad(lambda x: np.exp(-.5*x**2/r_2)*f2(x)**2/np.sqrt(2*np.pi*r_2),-np.inf,np.inf)[0]
ks_2=np.sqrt(var_2-r_2*k1_2**2)

print("var1={} var2={}".format(var_1,var_2))

var1=0.3942944903978409 var2=0.23645041049929583


In [5]:
teacher_activ="sign"

ft=lambda x:  np.sign(x)

rt=np.mean(Sigma)

k1_t=quad(lambda x: np.exp(-.5*x**2/rt)*x*ft(x)/np.sqrt(2*np.pi*rt),-np.inf,np.inf)[0]/rt
var_t=quad(lambda x: np.exp(-.5*x**2/rt)*ft(x)**2/np.sqrt(2*np.pi*rt),-np.inf,np.inf)[0]
ks_t=np.sqrt(np.maximum(var_t-rt*k1_t**2,0))

In [6]:
print("teacher r={} k1={} ks={}\n".format(rt,k1_t,ks_t))
print("layer #1 r={} k1={} ks={}\n".format(r_1,k1_1,ks_1))
print("layer #2 r={} k1={} ks={}".format(r_2,k1_2,ks_2))

teacher r=1.0 k1=0.7978845608028652 ks=0.602810274989087

layer #1 r=1.0 k1=0.6057055096021587 ks=0.16557574108374126

layer #2 r=0.3942944903978409 k1=0.7635495895007043 ks=0.08107760388474508


In [9]:

theta=np.random.randn(kt)/np.sqrt(kt)
Wt=np.random.randn(kt,d)/np.sqrt(d)


if teacher_activ=="id":
    kt=d
    theta=np.random.randn(kt)/np.sqrt(kt)
    Wt=np.eye(d)



W1=np.random.randn(k1,d)/np.sqrt(d)
W2=np.random.randn(k2,k1)/np.sqrt(k1)

In [10]:


Omega=k1_2**2*k1_1**2*(W2@W1@np.diag(Sigma)@W1.T@W2.T)+k1_2**2*ks_1**2*W2@W2.T+ks_2**2*np.eye(k2)  

In [11]:
Phi=k1_1*k1_2*k1_t*W2@W1@np.diag(Sigma)@Wt.T


In [13]:
Psi=k1_t**2*Wt@np.diag(Sigma)@Wt.T+ks_t**2*np.eye(kt)


In [14]:
def replica(alphas,Psi,Omega, Phi,lamb=0.1,verbose=False):   
    
    data_model=Custom(
                     teacher_teacher_cov=Psi,
                     student_student_cov=Omega,
                     teacher_student_cov=Phi,
                     teacher=theta.flatten()*np.sqrt(kt),
                     fixed_teacher=True)
    experiment = CustomExperiment(task = 'ridge_regression', 
                                  regularisation = lamb, 
                                  data_model = data_model, 
                                  tolerance = 1e-9, 
                                  damping = 0.5, 
                                  verbose = verbose, 
                                  max_steps = 10000)
    experiment.learning_curve(alphas =[alphas[0]])
    replicas=experiment.get_curve()


    for i in range(1,len(alphas)):
        experiment = CustomExperiment(task = 'ridge_regression', 
                                  regularisation = lamb, 
                                  data_model = data_model, 
                                  tolerance = 1e-9, 
                                  damping = 0.5, 
                                  verbose = verbose, 
                                  max_steps = 10000)
        experiment.learning_curve(alphas =[alphas[i]])
        replicas=pd.concat([replicas,experiment.get_curve()])

    
    return replicas

In [18]:
alphas=np.linspace(0.1,8,20)*d/k2

In [19]:
lamb=0.001

In [None]:
repl=replica(alphas,Psi,Omega, Phi,lamb=lamb)

In [None]:
plt.plot(repl["sample_complexity"],repl["test_error"])
