# 残差逼近

In [15]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [25]:
class Boost:
    def __init__(self, base_model=SVR,T=10) -> None:
        '''
        使用SVR模型作为弱回归器，迭代次数默认为10
        :param base_model:
        :param T:
        '''
        self.T = T
        self.base_model = base_model
        self.boost_model = [self.base_model() for _ in range(self.T)]

    def fit(self, X, y):
        target = np.copy(y).astype(np.float64)
        for i in range(self.T):
            self.boost_model[i].fit(X,target)
            pred = self.boost_model[i].predict(X)
            target -= pred
        return self

    def predict(self, X):
        predictions = np.zeros(X.shape[0])
        for model in self.boost_model:
            predictions += model.predict(X)
        return predictions

    def score(self, X, y):
        return r2_score(y, self.predict(X))

In [26]:
data = pd.read_csv('E:\\桌面\\数据分析项目实战\\data\\creditcard.csv')
data.info()
X = data.drop('Class',axis=1)
y = data['Class']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)
boost = Boost(base_model=SVR,T=10)
boost.fit(X_train,y_train)

<__main__.Boost at 0x50b34820>

In [28]:
# 评估
print("R² (测试集):", boost.score(X_test, y_test))

R² (测试集): -6.072198228793072
