# 0. Import Library

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns

import scipy.stats

![](https://onlinelibrary.wiley.com/cms/asset/ea1d3bd8-afd7-4914-b645-74d424b6690d/advs3654-fig-0002-m.jpg)

# 1. Input Data

In [None]:
df = pd.read_csv('advertising.csv')

# 2. Data Preprocessing

## 2.1 Exploratory Data Analysis (EDA)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

## 2.2 Data Cleaning

### - Missing Value

In [None]:
df.isnull().sum()

### - Inconsistent Data

In [None]:
df.dtypes

### - Outliers

#### TV

In [None]:
#Round1
q3 = df.TV.quantile(.75)
q1 = df.TV.quantile(.25)
iqr = q3 - q1
upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1) #
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1) #
plt.axvline(x=lower, c='r', linestyle='--') #
plt.axvline(x=upper, c='r', linestyle='--') #
sns.boxplot(x=df.TV, width=0.4, palette='Set2')
plt.show()

#### Radio

In [None]:
#Round1
q3 = df.Radio.quantile(.75)
q1 = df.Radio.quantile(.25)
iqr = q3 - q1
upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1) #
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1) #
plt.axvline(x=lower, c='r', linestyle='--') #
plt.axvline(x=upper, c='r', linestyle='--') #
sns.boxplot(x=df.Radio, width=0.4, palette='Set2')
plt.show()

#### Newspaper

In [None]:
#Round1
q3 = df.Newspaper.quantile(.75)
q1 = df.Newspaper.quantile(.25)
iqr = q3 - q1
upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1) #
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1) #
plt.axvline(x=lower, c='r', linestyle='--') #
plt.axvline(x=upper, c='r', linestyle='--') #
sns.boxplot(x=df.Newspaper, width=0.4, palette='Set2')
plt.show()

In [None]:
filter_upper = df.Newspaper > upper
df.drop(df[filter_upper].index, inplace=True)

In [None]:
#Round2
q3 = df.Newspaper.quantile(.75)
q1 = df.Newspaper.quantile(.25)
iqr = q3 - q1
upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1) #
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1) #
plt.axvline(x=lower, c='r', linestyle='--') #
plt.axvline(x=upper, c='r', linestyle='--') #
sns.boxplot(x=df.Newspaper, width=0.4, palette='Set2')
plt.show()

#### Sales

In [None]:
#Round1
q3 = df.Sales.quantile(.75)
q1 = df.Sales.quantile(.25)
iqr = q3 - q1
upper = q3 + (1.5 * iqr)
lower = q1 - (1.5 * iqr)
print('Lower: {:.2f} \nUpper: {:.2f}' . format(lower, upper))

plt.axvline(x=q1, c='b', linestyle='--', linewidth=1) #
plt.axvline(x=q3, c='g', linestyle='--', linewidth=1) #
plt.axvline(x=lower, c='r', linestyle='--') #
plt.axvline(x=upper, c='r', linestyle='--') #
sns.boxplot(x=df.Sales, width=0.4, palette='Set2')
plt.show()

## 2.3 Data Transformation

![](https://images.datacamp.com/image/upload/v1677149248/label_encoding_d4ae789503.png?updated_at=2023-02-23T10:47:28.618Z)

In [None]:
df.dtypes

## [Optional] Export เป็นไฟล์เมื่อทำ Data Cleaning เสร็จแล้ว

In [None]:
df.to_csv('รหัส.csv',index=False)
df.to_excel('รหัส.xlsx',index=False)

## 2.4 ตรวจสอบค่าสหสัมพันธ์ของตัวแปร x, y

In [None]:
df.corr()

In [None]:
plt.rcParams['figure.figsize'] = 10,7 
sns.heatmap(df.corr(), annot=True);

## 2.5 การกำหนด Feature / Target

In [None]:
DataMatrix = df.values
DataMatrix.shape

In [None]:
feature_cols=['TV','Radio','Newspaper']
X=df[feature_cols]
Y=df.Sales

## 2.6 Data Preparation (แบ่งข้อมูลสำหรับ Training / Testing)

In [None]:
from sklearn.model_selection import train_test_split
X_Train, X_Test, Y_Train, Y_Test = train_test_split(X, Y, test_size=0.3, random_state=0)

# 3. Modelling: k-Nearest Neighbors

![](https://miro.medium.com/v2/resize:fit:1151/0*ItVKiyx2F3ZU8zV5)

In [None]:
from sklearn.neighbors import KNeighborsRegressor

## 3.1 Training Data

In [None]:
model = KNeighborsRegressor(n_neighbors=3)

In [None]:
model.fit(X_Train, Y_Train)

In [None]:
model.score(X_Train, Y_Train)

## 3.2 Predict Data

In [None]:
y_predict = model.predict(X_Test)

# 4. Model Evaluation

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
print("r2 Score = ",r2_score(Y_Test, y_predict))
print("MSE = ",mean_squared_error(Y_Test, y_predict))
print("MAE = ",mean_absolute_error(Y_Test, y_predict))

# [Optional] Save Machine Learning Models

In [None]:
import pickle
filename = 'pickle_รหัสนศ.sav'
pickle.dump(model, open(filename, 'wb'))