# 0. Import Library

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import scipy.stats

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error

import pandas as pd
import seaborn as sns

![](https://onlinelibrary.wiley.com/cms/asset/ea1d3bd8-afd7-4914-b645-74d424b6690d/advs3654-fig-0002-m.jpg)

# 1. Input Data

In [None]:
df = pd.read_csv('advertising.csv',index_col=0)

# 2. Data Preprocessing

## 2.1 Exploratory Data Analysis (EDA)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
sns.pairplot(df, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', height=4)

In [None]:
sns.pairplot(df, x_vars=['TV','Radio','Newspaper'], y_vars='Sales', height=4, kind='reg') #kind{‘scatter’, ‘kde’, ‘hist’, ‘reg’}

## 2.2 Data Cleaning

### - Missing Value

In [None]:
df.isnull().sum()

### - Outlier

### Feature: TV

In [None]:
q3 = df.TV.quantile(.75)
q1 = df.TV.quantile(.25)

iqr = q3 - q1

upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1)
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1)
plt.axvline(x=lower, c='r', linestyle='--')
plt.axvline(x=upper, c='r', linestyle='--')
sns.boxplot(x=df.TV, width=0.4, palette='Set2')
plt.show()

### Feature: Radio

In [None]:
q3 = df.Radio.quantile(.75)
q1 = df.Radio.quantile(.25)

iqr = q3 - q1

upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1)
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1)
plt.axvline(x=lower, c='r', linestyle='--')
plt.axvline(x=upper, c='r', linestyle='--')
sns.boxplot(x=df.Radio, width=0.4, palette='Set2')
plt.show()

### Feature: Newspaper

In [None]:
q3 = df.Newspaper.quantile(.75)
q1 = df.Newspaper.quantile(.25)

iqr = q3 - q1

upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1)
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1)
plt.axvline(x=lower, c='r', linestyle='--')
plt.axvline(x=upper, c='r', linestyle='--')
sns.boxplot(x=df.Newspaper, width=0.4, palette='Set2')
plt.show()

In [None]:
filter_upper = df.Newspaper> upper
df.drop(df[filter_upper].index, inplace=True)

In [None]:
q3 = df.Newspaper.quantile(.75)
q1 = df.Newspaper.quantile(.25)

iqr = q3 - q1

upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1)
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1)
plt.axvline(x=lower, c='r', linestyle='--')
plt.axvline(x=upper, c='r', linestyle='--')
sns.boxplot(x=df.Newspaper, width=0.4, palette='Set2')
plt.show()

### Target: Sales

In [None]:
q3 = df.Sales.quantile(.75)
q1 = df.Sales.quantile(.25)

iqr = q3 - q1

upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1)
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1)
plt.axvline(x=lower, c='r', linestyle='--')
plt.axvline(x=upper, c='r', linestyle='--')
sns.boxplot(x=df.Sales, width=0.4, palette='Set2')
plt.show()

### [Optional] Export เป็นไฟล์เมื่อทำ Data Cleaning เสร็จแล้ว

In [None]:
#df.to_csv('รหัสนักศึกษา.csv',index=False)
#df.to_excel('รหัสนักศึกษา.xlsx',index=False)

## 2.3 ตรวจสอบค่าสหสัมพันธ์ของข้อมูล

In [None]:
df.corr()

In [None]:
plt.rcParams['figure.figsize'] = 10,7 
sns.set(font_scale=2.0)
sns.heatmap(df.corr(), annot=True);

## 2.4 การกำหนด Feature / Target

### -แบบที่ 1

In [None]:
feature_cols=['TV','Radio','Newspaper']
X=df[feature_cols]
Y=df.Sales

### -แบบที่ 2

In [None]:
#DataMatrix = df.values
#DataMatrix.shape

In [None]:
#X = DataMatrix[:, :3]
#Y = DataMatrix[:, 3:]

In [None]:
Y.shape

## 2.5 Data Preapration (แบ่งข้อมูลสำหรับ Training / Testing)

In [None]:
X_Train = X[:-60]
Y_Train = Y[:-60]
X_Test = X[-60:]
Y_Test = Y[-60:]

In [None]:
X_Train.shape

In [None]:
X_Test.shape

# 3. Modelling: Multiple Regression

## 3.1 Training Data

In [None]:
model = LinearRegression()
model.fit(X_Train, Y_Train)

In [None]:
model.coef_ , model.intercept_

In [None]:
model.score(X_Train, Y_Train)

## 3.2 Predict Data

In [None]:
y_predict = model.predict(X_Test)

# 4. Model Evaluation

In [None]:
print("r2 Score = ",r2_score(Y_Test, y_predict))
print("MSE = ",mean_squared_error(Y_Test, y_predict))
print("MAE = ",mean_absolute_error(Y_Test, y_predict))
print("MAPE = ",mean_absolute_percentage_error(Y_Test, y_predict))

# [Optional] Save Machine Learning Models

In [None]:
#import joblib
#filename = 'joblib_รหัสนศ.sav'
#joblib.dump(model, filename)

In [None]:
#import pickle
#filename = 'pickle_รหัสนศ.sav'
#pickle.dump(model, open(filename, 'wb'))