In [1]:
import sklearn

In [2]:
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor

import pandas as pd
import numpy as np

In [3]:
X, y = make_regression()
pdf = pd.DataFrame(X)
pdf.columns = ['c{}'.format(x) for x in range(100)]

In [4]:
X.shape

(100, 100)

In [5]:
X1 = pdf[['c{}'.format(x) for x in range(50, 100)]]
X2 = pdf[['c{}'.format(x) for x in range(50)]]

In [11]:
"""
To do:

penalise the incoming expanded feature size with `lambda_`.

Essentially if absolute value of incoming feature is less than `lambda_` 
then the coefficient is set to 0.
"""

class GraftingRegressor(SGDRegressor):
    def __init__(self, loss="squared_loss", penalty="l2", alpha=0.0001,
                 l1_ratio=0.15, fit_intercept=True, max_iter=None, tol=None,
                 shuffle=True, verbose=0, epsilon=0.1,
                 random_state=None, learning_rate="invscaling", eta0=0.01,
                 power_t=0.25, warm_start=False, average=False, n_iter=None):
        super(GraftingRegressor, self).__init__(loss=loss, penalty=penalty,
                                           alpha=alpha, l1_ratio=l1_ratio,
                                           fit_intercept=fit_intercept,
                                           max_iter=max_iter, tol=tol,
                                           shuffle=shuffle,
                                           verbose=verbose,
                                           epsilon=epsilon,
                                           random_state=random_state,
                                           learning_rate=learning_rate,
                                           eta0=eta0, power_t=power_t,
                                           warm_start=warm_start,
                                           average=average, n_iter=n_iter)
    
    def _partial_grafting_fit(self, X, y):
        # presume input is X?
        n_samples, n_features = X.shape
        coef_list = np.zeros(n_features, dtype=np.float64, order="C")
        coef_list[:self.coef_.shape[0]] = self.coef_.copy()
        self.coef_ = coef_list.copy()
    
    def partial_fit(self, X, y, sample_weight=None):
        self._partial_grafting_fit(X, y)
        return self._partial_fit(X, y, self.alpha, C=1.0,
                                 loss=self.loss,
                                 learning_rate=self.learning_rate, max_iter=1,
                                 sample_weight=sample_weight, coef_init=None,
                                 intercept_init=None)

In [7]:
model = GraftingRegressor(max_iter=1000)
model.fit(X1, y)

GraftingRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
         fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
         loss='squared_loss', max_iter=1000, n_iter=None, penalty='l2',
         power_t=0.25, random_state=None, shuffle=True, tol=None,
         verbose=0, warm_start=False)

In [8]:
len(model.coef_)

50

In [9]:
model.partial_fit(pdf, y)

GraftingRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
         fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
         loss='squared_loss', max_iter=1000, n_iter=None, penalty='l2',
         power_t=0.25, random_state=None, shuffle=True, tol=None,
         verbose=0, warm_start=False)

In [10]:
len(model.coef_)

100