# 多元線性迴歸
### 中電會三月主題課程(2024/3/23)

<table class="tfo-notebook-buttons" align="left">
  <td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/ChiuDeYuan/linear_regression_example/blob/main/housing_price_MLR.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/ChiuDeYuan/linear_regression_example/blob/main/housing_price_MLR.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View on GitHub</a>
  </td>
</table>

## Useful links

* Housing Prices Dataset : https://www.kaggle.com/datasets/yasserh/housing-prices-dataset
* Linear Models (Scikit-learn) : https://scikit-learn.org/stable/modules/linear_model.html

## Imports

In [1]:
from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.utils import shuffle
from sklearn.feature_selection import RFE

In [2]:
import statsmodels.api as sm

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## Loads data

In [4]:
dataset_path = 'https://raw.githubusercontent.com/ChiuDeYuan/linear_regression_example/main/Housing.csv'
dataset = pd.read_csv(dataset_path)

In [None]:
dataset.head()

In [None]:
dataset.shape

## 準備資料

In [7]:
mapped_var = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

def map_func(x):
    return x.map({'yes':1 , 'no':0})

dataset[mapped_var] = dataset[mapped_var].apply(map_func)

In [8]:
dataset = dataset.drop('furnishingstatus', axis=1)

In [None]:
dataset.head()

## 縮放數據

In [10]:
from sklearn.preprocessing import MinMaxScaler

In [11]:
scaler = MinMaxScaler()

vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking', 'price']
dataset[vars] = scaler.fit_transform(dataset[vars])

In [None]:
dataset.head()

In [None]:
dataset.describe()

## 篩選特徵

In [None]:
import seaborn as sns

plt.figure(figsize = (8, 5))
sns.heatmap(dataset.corr(), annot = True, cmap="PuBuGn")
plt.show()

In [15]:
dataset_y = dataset.pop('price')
dataset_x = dataset

In [16]:
reg = linear_model.LinearRegression(fit_intercept = True)

In [17]:
rfe = RFE(reg, n_features_to_select=5)
rfe = rfe.fit(dataset_x, dataset_y)

In [None]:
list(zip(dataset_x.columns, rfe.support_, rfe.ranking_))

In [None]:
col = dataset_x.columns[rfe.support_]
col

In [20]:
dataset_x = dataset_x[col]

In [21]:
dataset_x = sm.add_constant(dataset_x)

In [None]:
dataset_x.head()

## 分割資料集

In [23]:
dataset_x, dataset_y = shuffle(dataset_x, dataset_y, random_state=0)

In [24]:
dataset_x_train = dataset_x[:-30]
dataset_x_test = dataset_x[-30:]

dataset_y_train = dataset_y[:-30]
dataset_y_test = dataset_y[-30:]

In [None]:
print(f"{dataset_x_train.shape}\n{dataset_x_test.shape}")

## 訓練模型

In [26]:
reg = sm.OLS(dataset_y_train,dataset_x_train).fit()

## 預測&評估

In [None]:
print(reg.summary())

In [28]:
prediction = reg.predict(dataset_x_test)

In [None]:
r2_score(dataset_y_test, prediction)