In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import pandas as pd
import seaborn as sns
from scipy import optimize
from sklearn.metrics import mean_squared_error

In [25]:
class LMM():

    def __init__(self):
        pass

    def fit(self, X, y, groups, method="bfgs"):
        
        
        if method == "bfgs":

            def f(x):
                beta_shared, beta_fg = x
                preds = np.squeeze(X) * beta_shared + np.matmul(groups, np.squeeze(X)) * beta_fg
                # MSE
                return np.mean((y - preds)**2)

            # Initial value of x
            x0 = np.random.normal(size=2)

            # Try with BFGS
            xopt = optimize.minimize(f,x0,method='bfgs',options={'disp':1})
            
            self.coefs_shared = xopt.x[0]
            self.coefs_fg = xopt.x[1]

        elif method == "project":

            # Regression on all samples
            reg = LinearRegression().fit(X, y)
            coefs_shared = reg.coef_

            # Get residuals for foreground group
            X_fg = X[groups == 1]
            y_fg = y[groups == 1]
            X_fg_preds = reg.predict(X_fg)
            X_residuals = y_fg - X_fg_preds

            # Regress residuals on the foreground
            reg = LinearRegression().fit(X_fg, X_residuals)
            coefs_fg = reg.coef_

            self.coefs_shared = coefs_shared
            self.coefs_fg = coefs_fg

        else:
            raise Exception("Method must be one of [bfgs, project]")


    def predict(self, X, y, groups):
        print(self.coefs_shared.shape)
        print("FP: ", (np.squeeze(X) * self.coefs_shared).shape)
        print("SP: ", np.matmul(groups, np.squeeze(X)).shape)
        print("TP: ", self.coefs_fg.shape)
        preds = np.squeeze(X) * self.coefs_shared + np.matmul(groups, np.squeeze(X)) * self.coefs_fg
        return preds

In [26]:
# simple example
n = 200
p = 12
coefs_shared_true = np.repeat([1], p)
coefs_shared_true = np.reshape(coefs_shared_true, (p, 1))
coefs_fg_true = np.repeat([4], p)
coefs_fg_true = np.reshape(coefs_fg_true, (p, 1))
X = np.random.normal(0, 1, size=(n, p))

groups = np.random.binomial(n=1, p=0.5, size=n)

# Shared effect
y = X @ coefs_shared_true 
y = y.reshape((1, n))
y = y + np.random.normal(0, 1, n)

# Foreground-specific effect
y = y.reshape((n, 1))

y[groups == 1] = y[groups == 1] + X[groups == 1, :] @ coefs_fg_true
groups = np.reshape(groups, (1, n))

In [27]:
# Fit LMM
lmm = LMM()
print("X shape: ", X.shape)
print("y shape: ", y.shape)
lmm.fit(X, y, groups=groups)

X shape:  (200, 12)
y shape:  (200, 1)
Optimization terminated successfully.
         Current function value: 148.223615
         Iterations: 8
         Function evaluations: 27
         Gradient evaluations: 9
      fun: 148.22361452106261
 hess_inv: array([[ 0.50811353, -0.00334869],
       [-0.00334869,  0.00319808]])
      jac: array([0., 0.])
  message: 'Optimization terminated successfully.'
     nfev: 27
      nit: 8
     njev: 9
   status: 0
  success: True
        x: array([ 2.97866127, -0.01484787])


In [28]:
# Test on a random test set
X_test = np.random.normal(0, 1, size=(n, p))
y_test = X_test @ coefs_shared_true + np.random.normal(0, 1, n)
groups_test = np.random.binomial(n=1, p=0.5, size=n)
y_test[groups_test == 1] = y_test[groups_test == 1] + X_test[groups_test == 1, :] @ coefs_fg_true
groups_test = np.reshape(groups_test, (1, n))

preds = lmm.predict(X_test, y_test, groups_test)

()
FP:  (200, 12)
SP:  (1, 12)
TP:  ()


In [24]:
preds.shape

(200, 12)