# Regression
The task of regression is to predict a continuous-valued attribute associated with an object.

In [None]:
import pandas as pd
from sklearn import tree
from sklearn.feature_selection import mutual_info_regression

### Let's try to create an ML model that will estimate the market value of a house in Melbourne based on the input information. First we download the data from https://github.com/njtierney/melb-housing-data/.

In [None]:
data = pd.read_csv('https://raw.githubusercontent.com/njtierney/melb-housing-data/master/data-raw/melbourne_housing_raw.csv')

### Now we can look at it.

In [None]:
data.head()

### We can see that some data rows contain missing data (NaN), we have to remove them.

In [None]:
data = data.dropna()

In [None]:
data.head()

### The value to be predicted (Target) is in the 'Price' column. We will store target into the variables `y_train` for training and `y_test` for testing.

In [None]:
y_train = data.Price[:8000]
y_test = data.Price[8000:]

### We choose input features for our model, e.q. 'Rooms', 'Bathroom', 'Landsize', 'Lattitude', 'BuildingArea' and 'Longtitude'. Then we store the input features into variable `X_train` for training and `X_test` for testing.

In [None]:
features = ['Rooms', 'Bathroom', 'Landsize', 'BuildingArea', 'Lattitude', 'Longtitude']

In [None]:
X_train = data[features].iloc[:8000]
X_test = data[features].iloc[8000:]

In [None]:
X_train.describe()

In [None]:
X_test.describe()

### Now we create the regression model and train it on training_dataset. We will use the Decision Tree Regressor.

In [None]:
model = tree.DecisionTreeRegressor(random_state=1)
model.fit(X_train, y_train)

### The prediction for first houses from training dataset

In [None]:
display(X_train.iloc[:5])
display(y_train.iloc[:5])
print(f'Prediction: {model.predict(X_train.iloc[:5])}')

### The prediction for first houses from testing dataset

In [None]:
display(X_test.iloc[:5])
display(y_test.iloc[:5])
print(f'Prediction: {model.predict(X_test.iloc[:5])}')

### Inspection of our model

In [None]:
print(tree.export_text(model))