In [0]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [4]:
iris = datasets.load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
Y = iris.target
print(X)

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8

[150 rows x 4 columns]


In [5]:
# Remove correlated columns
corr_df = X.corr().abs()
up_tri = np.triu(
	np.full(corr_df.shape, 1), k=1)
up_tri = up_tri.astype(bool)
corr_df = corr_df.where(up_tri)
correlated_cols = [col for col in corr_df if any(corr_df[col] > 0.75)]
print(correlated_cols)

['petal length (cm)', 'petal width (cm)']


In [6]:
X.drop(correlated_cols, axis=1, inplace=True)
print(X.shape)

(150, 2)


In [33]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

(120, 2)
(30, 2)


In [0]:
model1 = LinearRegression()
model2 = KNeighborsRegressor()

In [35]:
model1.fit(X_train, Y_train)
model2.fit(X_train, Y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                    weights='uniform')

In [0]:
pred1 = model1.predict(X_test)
pred2 = model2.predict(X_test)

In [53]:
print("Model1 mean_squared_error:", mean_squared_error(Y_test, pred1))
print("Model2 mean_squared_error:", mean_squared_error(Y_test, pred2))
print("Model1 r2_score:", r2_score(Y_test, pred1))
print("Model2 r2_score:", r2_score(Y_test, pred2))

Model1 mean_squared_error: 0.18133032773112684
Model2 mean_squared_error: 0.12666666666666668
Model1 r2_score: 0.7280045084033098
Model2 r2_score: 0.81


Simple Averaging

In [0]:
pred = (pred1 + pred2) / 2

In [55]:
print("mean_squared_error:", mean_squared_error(Y_test, pred))
print("r2_score:", r2_score(Y_test, pred))

mean_squared_error: 0.13252767579124675
r2_score: 0.8012084863131299
