## **Data preparation**

In [2]:
import pandas as pd
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()
X = pd.DataFrame(data['data'], columns=data['feature_names'])
y = data['target']

X.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=30, test_size=0.2)
print(f"Train shape: {X_train.shape}", 
      f"Test shape: {X_test.shape}", sep="\n")

Train shape: (16512, 8)
Test shape: (4128, 8)


## **Models**

### **KNNRegessor**

In [4]:
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor()

In [5]:
knn.fit(X_train, y_train);

In [6]:
from sklearn.metrics import r2_score

pred_train = knn.predict(X_train)
pred_test = knn.predict(X_test)

print(f"Train R2: {r2_score(y_train, pred_train):.3f}", 
      f"Test R2: {r2_score(y_test, pred_test):.3f}", sep="\n")

Train R2: 0.455
Test R2: 0.139


### **Linear regression**

In [7]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

In [9]:
lr.fit(X_train, y_train);

In [10]:
pred_train = lr.predict(X_train)
pred_test = lr.predict(X_test)

print(f"Train R2: {r2_score(y_train, pred_train):.3f}", 
      f"Test R2: {r2_score(y_test, pred_test):.3f}", sep="\n")

Train R2: 0.611
Test R2: 0.588


### **Decision tree regressor**

In [14]:
from sklearn.tree import DecisionTreeRegressor

tree = DecisionTreeRegressor(random_state=30, max_depth=9)

In [15]:
tree.fit(X_train, y_train);

In [16]:
pred_train = tree.predict(X_train)
pred_test = tree.predict(X_test)

print(f"Train R2: {r2_score(y_train, pred_train):.3f}", 
      f"Test R2: {r2_score(y_test, pred_test):.3f}", sep="\n")

Train R2: 0.807
Test R2: 0.683
