In [20]:
import numpy as np
import pandas as pd
import yaml
from spxmod.model import XModel
from scipy.sparse import spdiags,csc_matrix
import matplotlib.pyplot as plt
# environment: /mnt/team/msca/priv/jira/MSCA-399-rover-stage/env

In [2]:
with open("../spxmod_example/xmodel_config.yaml", "r") as f:
    xmodel_config = yaml.safe_load(f)

data = pd.read_parquet("../spxmod_example/data.parquet").query("train")
train = data.query("~holdout").reset_index(drop=True)
test = data.query("holdout").reset_index(drop=True)

xmodel = XModel.from_config(xmodel_config["xmodel"])
xmodel_config["xmodel_fit"]['options']['gtol'] = 1e-6
xmodel_config["xmodel_fit"]['options']['xtol'] = 1e-10

xmodel.fit(train, data_span=data, **xmodel_config["xmodel_fit"])

print(f"train: {np.mean(np.abs(train['obs'] - xmodel.predict(train)))}")
print(f"test: {np.mean(np.abs(test['obs'] - xmodel.predict(test)))}")

NTCGSolver:
niter=  0, fun=1.77e+08, gnorm=1.48e+07, xdiff=1.00e+00, step=1.00e+00
niter=  1, fun=3.25e+07, gnorm=3.53e+06, xdiff=2.00e+00, step=1.00e+00, cg_iter=1
niter=  2, fun=1.09e+07, gnorm=1.23e+06, xdiff=1.14e+00, step=1.00e+00, cg_iter=1
niter=  3, fun=3.90e+06, gnorm=4.46e+05, xdiff=1.04e+00, step=1.00e+00, cg_iter=1
niter=  4, fun=1.43e+06, gnorm=1.63e+05, xdiff=1.02e+00, step=1.00e+00, cg_iter=1
niter=  5, fun=5.40e+05, gnorm=5.97e+04, xdiff=1.01e+00, step=1.00e+00, cg_iter=1
niter=  6, fun=2.15e+05, gnorm=2.18e+04, xdiff=1.00e+00, step=1.00e+00, cg_iter=2
niter=  7, fun=9.80e+04, gnorm=7.89e+03, xdiff=1.00e+00, step=1.00e+00, cg_iter=2
niter=  8, fun=5.67e+04, gnorm=2.78e+03, xdiff=1.00e+00, step=1.00e+00, cg_iter=2
niter=  9, fun=4.33e+04, gnorm=9.34e+02, xdiff=1.00e+00, step=1.00e+00, cg_iter=2
niter= 10, fun=3.96e+04, gnorm=2.68e+02, xdiff=9.99e-01, step=1.00e+00, cg_iter=2
niter= 11, fun=3.89e+04, gnorm=4.93e+01, xdiff=9.98e-01, step=1.00e+00, cg_iter=4
niter= 12, fun=

  alpha = rho_cur / dotprod(p, q)
  p += z


niter= 23, fun=3.88e+04, gnorm=2.07e-04, xdiff=3.49e-06, step=1.56e-02, cg_iter=5000
niter= 24, fun=3.88e+04, gnorm=5.91e-04, xdiff=6.47e-06, step=3.12e-02, cg_iter=5000
niter= 25, fun=3.88e+04, gnorm=2.61e-04, xdiff=4.61e-06, step=7.81e-03, cg_iter=5000
niter= 26, fun=3.88e+04, gnorm=1.58e-04, xdiff=1.02e-06, step=3.91e-03, cg_iter=5000
niter= 27, fun=3.88e+04, gnorm=7.93e-04, xdiff=9.88e-06, step=6.25e-02, cg_iter=5000
niter= 28, fun=3.88e+04, gnorm=7.93e-04, xdiff=1.51e-09, step=1.91e-06, cg_iter=5000
niter= 29, fun=3.88e+04, gnorm=7.92e-04, xdiff=7.56e-10, step=9.54e-07, cg_iter=5000
niter= 30, fun=3.88e+04, gnorm=5.09e-04, xdiff=7.74e-07, step=9.77e-04, cg_iter=5000
niter= 31, fun=3.88e+04, gnorm=4.63e-04, xdiff=1.24e-07, step=2.44e-04, cg_iter=5000
niter= 32, fun=3.88e+04, gnorm=4.63e-04, xdiff=6.90e-12, step=1.49e-08, cg_iter=5000
train: 1.1566022419588144e-05
test: 1.3389779355447823e-05


In [3]:
xmodel_config["xmodel_fit"]
xmodel_config

{'xmodel': {'model_type': 'binomial',
  'obs': 'obs',
  'spaces': [<spxmod.space.Space at 0x7faa737ee330>],
  'var_builders': [<spxmod.variable_builder.VariableBuilder at 0x7faa737b2b10>,
   <spxmod.variable_builder.VariableBuilder at 0x7faa737eecc0>],
  'weights': 'weights'},
 'xmodel_fit': {'options': {'cg_maxiter': 5000,
   'cg_maxiter_incr': 200,
   'cg_maxiter_init': 1000,
   'gtol': 1e-06,
   'maxiter': 100,
   'verbose': True,
   'xtol': 1e-10}}}

In [4]:
xmodel.core.attach_df(train,xmodel._encode)

In [23]:
import cvxpy as cp

y = xmodel.core.data.obs
A = xmodel.core.mat[0]
A = csc_matrix(A.multiply(1/(A.power(2).mean(axis=0)+1e-20)))

B = xmodel.core.get_linear_gmat()[0][0]
vv = xmodel.core.get_linear_gvec()[1][:,None]
B = B.multiply(vv).toarray()

n = A.shape[1]

beta = cp.Variable(n)
lambd = cp.Parameter(nonneg=True)
neg_log_likelihood = cp.sum(
    cp.multiply(xmodel.core.data.weights,cp.logistic(A @ beta)- cp.multiply(y, A @ beta))
)
regularization = cp.sum_squares(B@beta)/2

problem  = cp.Problem(cp.Minimize(
    (neg_log_likelihood + regularization)
))
problem.solve(verbose = True)

                                     CVXPY                                     
                                     v1.6.5                                    
(CVXPY) Apr 15 12:34:09 PM: Your problem has 38 variables, 0 constraints, and 0 parameters.
(CVXPY) Apr 15 12:34:09 PM: It is compliant with the following grammars: DCP, DQCP
(CVXPY) Apr 15 12:34:09 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.)
(CVXPY) Apr 15 12:34:09 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution.
(CVXPY) Apr 15 12:34:09 PM: Your problem is compiled with the CPP canonicalization backend.
-------------------------------------------------------------------------------
                                  Compilation                                  
-------------------------------------------------------------------------------
(CVXPY) Apr 15 12:34:09 PM: Compiling problem (target solver=CLARABEL).


SolverError: Solver 'CLARABEL' failed. Try another solver, or solve with verbose=True for more information.

In [None]:
xmodel.core.objective()

In [7]:
beta.value

array([-11.963652  , -11.96130415, -11.9565712 , -11.94944104,
       -11.93996805, -11.92829462, -11.91469285, -11.89956708,
       -11.88349518, -11.86714623, -11.85106213, -11.8358908 ,
       -11.82197549, -11.80957128, -11.79886832, -11.79023124,
       -11.78370767, -11.78049949, -11.77980447, -16.64315654,
       -16.64315654, -16.64315655, -16.64315657, -16.64315655,
       -16.64315648, -16.64315633, -16.64315612, -16.6431559 ,
       -16.64315569, -16.64315551, -16.64315537, -16.64315536,
       -16.64315536, -16.64315538, -16.6431554 , -16.64315545,
       -16.6431555 , -16.64315556])

In [8]:
xmodel.core.objective(beta.value)

39524.18453772909

In [9]:
xmodel.core.objective(xmodel.core.opt_coefs)

38801.41780580382

array([76.        , 88.        , 60.        , ..., 23.52278841,
       48.72741367, 16.53861265])

In [None]:
def gl_solve(
    A,
    hfunc,
    C = None,
    c = None,
    x0 = None,
):
    
    