# Numpy를 이용한 방정식 풀이

In [0]:
import numpy as np

ans = (np.poly1d([2, -1])**2 + np.poly1d([3, -5])**2 + np.poly1d([5, -6])**2)
ans

In [0]:
ans[0], ans[1], ans[2]

In [0]:
import sympy as sp
from sympy.plotting import plot

th = sp.symbols('th')
p1 = plot(38*(th**2) - 94*th + 62, (th, -3, 6))

In [0]:
diff_th = sp.diff(38*(th**2) - 94*th + 62, th)
diff_th

In [0]:
sp.solve(diff_th)

# Boston House Price Linear Regression Model

In [0]:
from sklearn.datasets import load_boston

boston = load_boston()
print(boston.DESCR)

In [0]:
boston.feature_names

In [0]:
import pandas as pd

boston_pd = pd.DataFrame(boston.data, columns = boston.feature_names)
boston_pd['PRICE'] = boston.target

boston_pd.head()

In [0]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(rc={'figure.figsize':(10, 6)})
plt.hist(boston_pd['PRICE'], bins=30)
plt.xlabel("House prices in $1000")
plt.show()

In [0]:
correlation_matrix = boston_pd.corr().round(1)
sns.heatmap(data=correlation_matrix, annot=True, cmap="bwr")

상관 행렬을 보면 LSTAT가 PRICE (-0.74)와 높은 음의 상관 관계를 갖는 것처럼 RM은 PRICE (0.7)와 강한 양의 상관 관계가 있음을 알 수 있습니다.
선형 회귀 모형의 피처를 선택할 때 중요한 점은 다중 공선 성을 확인하는 것입니다. RAD, TAX 기능은 0.91의 상관 관계를 갖습니다. 이러한 기능 쌍은 서로 밀접하게 관련되어 있습니다. 모델에 영향을 줄 수 있습니다. 상관 관계가 -0.75 인 기능 DIS 및 AGE도 마찬가지입니다.

In [0]:
sns.set_style('darkgrid')
fig, axs = plt.subplots(ncols=2)
sns.regplot(x='RM', y='PRICE', data=boston_pd, ax=axs[0])
sns.regplot(x='LSTAT', y='PRICE', data=boston_pd, ax=axs[1])

In [0]:
import numpy as np

X_rooms = boston_pd['RM']
y_price = boston_pd['PRICE']

X_rooms = np.array(X_rooms).reshape(-1,1)

print(X_rooms.shape)
print(y_price.shape)

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
                    train_test_split(X_rooms, y_price, 
                                     test_size = 0.2, random_state=13)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [0]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression()
reg.fit(X_train, y_train)

from sklearn.metrics import mean_squared_error

y_train_predict = reg.predict(X_train)
rmse = (np.sqrt(mean_squared_error(y_train, y_train_predict)))

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is ', rmse)

In [0]:
y_test_predict = reg.predict(X_test)
rmse = (np.sqrt(mean_squared_error(y_test, y_test_predict)))

print("The model performance for test set")
print("--------------------------------------")
print('RMSE is ', rmse)

In [0]:
prediction_space = np.linspace(min(X_rooms), max(X_rooms)).reshape(-1,1) 
plt.scatter(X_rooms, y_price)
plt.plot(prediction_space, reg.predict(prediction_space), 
         color = 'black', linewidth = 3)
plt.ylabel('value of house/1000($)')
plt.xlabel('number of rooms')
plt.show()

In [0]:
X = boston_pd.drop('PRICE', axis = 1)
y = boston_pd['PRICE']

X_train, X_test, y_train, y_test = \
                    train_test_split(X, y, test_size=0.2, random_state=13)

reg_all = LinearRegression()
reg_all.fit(X_train, y_train)

y_train_predict = reg_all.predict(X_train)
rmse = (np.sqrt(mean_squared_error(y_train, y_train_predict)))

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is ', rmse)

In [0]:
y_pred = reg_all.predict(X_test)
rmse = (np.sqrt(mean_squared_error(y_test, y_pred)))

print("The model performance for training set")
print("--------------------------------------")
print('RMSE is ', rmse)

In [0]:
plt.scatter(y_test, y_pred)
plt.xlabel("Actual House Prices ($1000)")
plt.ylabel("Predicted House Prices: ($1000)")
plt.title("Actual Prices vs Predicted prices")
plt.plot([0,48], [0, 48], 'r')