# Linear Regression on Boston Dataset for the Data Analytics Session
by Rathachai C.


## 1) To load libraries

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics



---



## 2) To load data and check

In [0]:
boston = pd.read_csv('https://rathachai.github.io/DA101/data/boston.csv')

In [0]:
boston

In [0]:
boston.info()

In [0]:
boston.describe()



---



## 3) To do data exploration

In [0]:
sns.pairplot(boston)

In [0]:
sns.pairplot(boston, x_vars=boston.columns, y_vars=["medv"])

In [0]:
sns.distplot(boston['medv'])



---



## 4) To do feature selection

### 4.1) to check the correlation

In [0]:
boston.corr()

In [0]:
sns.heatmap(boston.corr())

In [0]:
boston.corr().sort_values("medv")["medv"]

In [0]:
sns.pairplot(boston, x_vars=["lstat","rm"], y_vars=["medv"])

### 4.2) to select data

In [0]:
X = boston[["lstat","rm"]]
y = boston["medv"]

In [0]:
X

In [0]:
y



---



## 5) To create a Linear Regression Model

### 5.1) to split train and test datasets

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)

## 5.2) to train and create a linear regression model

In [0]:
lm = LinearRegression()

In [0]:
lm.fit(X_train,y_train)

In [0]:
lm.coef_

In [0]:
lm.intercept_



---



## 6) To evaluate the model

### 6.1) to predict from the test set

In [0]:
y_pred = lm.predict(X_test)

In [0]:
y_pred

### 6.2 to evaluate with some evaluation methods

In [0]:
plt.scatter(y_test,y_pred)

In [0]:
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
print("RMSE = ", rmse)

In [0]:
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print("MAPE = ", mape, "%")



---



## Exercises


1.   apply feature scaling (hint: https://scikit-learn.org/stable/modules/preprocessing.html)
2.   work with 5-fold cross-validation  (hint: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html)
3.   evaluate using RMSE



### do your exerciese here

In [0]:
# run your code here