In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [None]:
# Loading The Dataset
def load_data():
  data = pd.read_csv('USA_Housing.csv')
  return data

In [None]:
USAhousing = load_data()

In [None]:
USAhousing.head()

In [None]:
USAhousing.info()

In [None]:
USAhousing.describe()

In [None]:
USAhousing.columns

In [None]:
# Data Visualization
def visualize_data(data):
  sns.set_style("whitegrid")
  plt.style.use("fivethirtyeight")
  sns.distplot(data['Price'])

In [None]:
visualize_data(USAhousing)

In [None]:
def heatmap():
  sns.heatmap(USAhousing.corr(), annot=True)

In [None]:
heatmap()

In [None]:
# Spliting Data Into Training Set And Test Set
def split_data(USAhousing):
  X = USAhousing[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
               'Avg. Area Number of Bedrooms', 'Area Population']]
  y = USAhousing['Price']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
  return {
      "X_train": X_train,
      "X_test": X_test,
      "y_train": y_train,
      "y_test": y_test
  }

In [None]:
data = split_data(USAhousing)

In [None]:
X = USAhousing[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
               'Avg. Area Number of Bedrooms', 'Area Population']]
y = USAhousing['Price']
lin_reg = LinearRegression(normalize=True)
lin_reg.fit(data["X_train"],data["y_train"])
coeff_df = pd.DataFrame(lin_reg.coef_, X.columns, columns=['Coefficient'])
pred = lin_reg.predict(data["X_test"])

In [None]:
def plot_linear_regression():
  pred = apply_linear_regression(data["X_train"],data["X_test"], data["y_train"], data["y_test"])
  plt.scatter(data["y_test"], pred)

In [None]:
plot_linear_regression()

In [None]:
def plot_linear_regression2():
  pred = apply_linear_regression(data["X_train"],data["X_test"], data["y_train"], data["y_test"])
  sns.distplot((data["y_test"] - pred), bins=50);

In [None]:
plot_linear_regression2()

In [None]:
def print_evaluate(true, predicted):  
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    print('MAE:', mae)
    print('MSE:', mse)
    print('RMSE:', rmse)
    print('R2 Square', r2_square)

In [None]:
def evaluate_model():
  test_pred = lin_reg.predict(data["X_test"])
  train_pred = lin_reg.predict(data["X_train"])
  print('Test set evaluation:\n_____________________________________')
  print_evaluate(data["y_test"], test_pred)
  print('====================================')
  print('Train set evaluation:\n_____________________________________')
  print_evaluate(data["y_train"], train_pred)

In [None]:
evaluate_model()

In [None]:
def cross_val(model):
    pred = cross_val_score(model, X, y, cv=10)
    return pred.mean()

In [None]:
def evaluate(true, predicted):
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    return mae, mse, rmse, r2_square

In [None]:
def evaluation_dataframe():
  test_pred = lin_reg.predict(data["X_test"])
  results_df = pd.DataFrame(data=[["Linear Regression", *evaluate(data["y_test"], test_pred) , cross_val(LinearRegression())]], 
                          columns=['Model', 'MAE', 'MSE', 'RMSE', 'R2 Square', "Cross Validation"])
  return results_df

In [None]:
evaluation_dataframe()