In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from copy import copy
import pandas as pd
from tqdm.auto import tqdm
from sklearn.svm import LinearSVR
from sklearn.neighbors import KNeighborsRegressor


In [3]:
df = pd.read_csv('/Users/markymark/Desktop/BASS/dmf_data.csv', index_col=0)

In [4]:
df.head()

Unnamed: 0,income,health,age,drive,transport,spend,lambda_1,lambda_2,lambda_3
1,14878.418921,5,27,2,Public,,2.476777,-0.167808,-0.927279
2,18748.626162,2,82,5,Taxi,,1.139309,-0.571882,-0.539133
3,65584.575644,3,42,2,Public,,0.692606,-0.457764,-0.466095
4,23140.876404,1,64,3,Public,,1.175049,-0.540462,-0.581071
5,24112.879953,5,36,0,Public,,1.927762,-0.23831,-0.871698


In [5]:
np.set_printoptions(suppress=True)

X = df.dropna()[['age', 'income', 'drive']]
X_lambda = df.dropna()[['lambda_1', 'lambda_2', 'lambda_3']]
y = df.dropna().spend

LinearRegression().fit(X, y).coef_

array([0.08227652, 0.00008273, 0.95867876])

In [6]:
LinearRegression().fit(df.dropna()[['age', 'income', 'drive', 
                                    'lambda_1', 'lambda_2', 'lambda_3']], y).coef_

array([0.09110992, 0.00009568, 0.95591498, 2.2648908 , 0.36615245,
       2.60299426])

In [7]:
class BassBoost():
    
    def __init__(self, max_depth, eta=0.5, max_iter=5):
        self.trees = []
        self.max_depth = max_depth
        self.max_iter = max_iter
        self.eta = eta
        
    def bias_pred(self, X, y):
        pred = np.zeros(X.shape[0])
        
        for tree in range(len(self.trees)):
            pred += tree.predict(X)
        
        return pred
    
    def __calc_grad__(self, y, pred):
        return 2 * (pred - y)
        
         
    def fit(self, X_index, X_bias, y):
        target = y.copy()
        
        for i in tqdm(range(self.max_iter)):
            
            linear = LinearRegression().fit(X_index, target)
            self.betas = linear.coef_
            self.intercept = linear.intercept_
            
            grads = self.__calc_grad__(y, X_index @ self.betas + self.intercept)
            #grads = y - X_index @ self.betas - self.intercept
            
            tree = LinearSVR(C=self.max_depth, max_iter=10000).fit(X_bias, grads)
            self.trees.append(copy(tree))
            target -= tree.predict(X_bias)
            
        return self.betas
        
    def predict(self, X, y, decompose=False):
        
        pass
        

In [8]:
Bass = BassBoost(max_depth=0.01, max_iter=1000)

Bass.fit(X, X_lambda, y)

  0%|          | 0/1000 [00:00<?, ?it/s]

array([0.08220604, 0.00007978, 0.93903821])

In [201]:
Bass.trees

[DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionTreeRegressor(max_depth=2, random_state=999),
 DecisionT