# Pre-setting

In [None]:
!pip install sweetviz

In [None]:
# Import libraies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import sweetviz as sv

# House Prediction Model

## Import dataset as dataframe

In [None]:
def import_data(path, column_id):
  # Import data set
  df = pd.read_csv(path, header = 0)
  # Drop column id
  df = df.drop(axis = 1, columns = [column_id])

  return df

## Report dataset with sweetviz

In [None]:
def report_data(df):
  report = sv.analyze(df)
  report.show_html('./report.html')

##Handle with missing and nan values



In [None]:
def handle_with_nan_values(df)
  m = df.shape[0] # pre-number of traing examples 

  # MasVnrType, MasVnrArea and Electrical column
  # If nan values occupies less or eual than 1 percent, delete all nan-rows 
  number_nan_value_in_masvnr = df['MasVnrType'].isna().sum()

  percent_nan_value_in_masvnr = (number_nan_value_in_masvnr * 100) // m
  if percent_nan_value_in_masvnr <= 1:
    df.dropna(subset = ["MasVnrType"], inplace = True) 

  number_nan_value_in_electrical = df['Electrical'].isna().sum()

  percent_nan_value_in_electrical = (number_nan_value_in_electrical * 100) // m
  if percent_nan_value_in_electrical <= 1:
    df.dropna(subset = ["Electrical"], inplace = True)

  # Reset_index after delete nan-rows
  df.reset_index(drop = True, inplace = True)

  retrun df

In [None]:
def handle_with_missing_value(df, object_to_fillna):

  # Base on data description to decide
  df.fillna(value = object_to_fillna, inplace = True)

  return df

## Handle non-numerical features and normalize numerical features

In [None]:
def non_numerical_feature_scaling(df):
  columns = df.columns.values
  for column in columns:
    text_digit_value = {}
    def convert_to_int(val):
      return text_digit_value[val]
    def feature_normalization(val):
      return (val - mean_value) / std_value;
    if df[column].dtype != 'int64' and df[column].dtype != 'float64':
      column_list = df[column].tolist()
      column_set = set(column_list)
      x = 0
      for unique_element in column_set:
        if unique_element not in text_digit_value:
          text_digit_value[unique_element] = x
          x += 1
      df[column] = list(map(convert_to_int, df[column]))
    else:
      mean_value = df[column].mean()
      std_value = df[column].std()
      df[column] = df[column].apply(feature_normalization)
  return df

## Main concept of House Prediction Model

In [None]:
object_to_fillna = {"LotFrontage": 0,
                    "Alley": "#None",
                    "BsmtQual": "#None",
                    "BsmtCond": "#None",
                    "BsmtExposure": "#None",
                    "BsmtFinType1": "#None",
                    "BsmtFinType2": "#None",
                    "FireplaceQu": "#None",
                    "GarageType": "#None",
                    "GarageYrBlt": 0,
                    "GarageFinish": "#None",
                    "GarageQual": "#None",
                    "GarageCond": "#None",
                    "PoolQC": "#None",
                    "Fence": "#None",
                    "MiscFeature": "#None"}

In [None]:
df = import_data(path = '/content/drive/Shareddrives/AI-Project/train.csv', column_id = 'Id')

In [None]:
df = handle_with_nan_values(df)

y = df['SalePrice']
X = df.drop(axis = 1, columns = ['SalePrice'])

X = handle_with_missing_value(X, object_to_fillna)

X = non_numerical_feature_scaling(X)

m = X.shape[0] # pos-number of traing examples
n = X.shape[1] # number of features
X.insert(0, "Theta Zero", np.ones((m, 1), dtype = int), True)

X = X.to_numpy()
y = y.to_numpy()

In [None]:
def computeCost(error_value, m):
  J = np.dot(error_value, np.transpose(error_value))
  J = J / (2 * m)
  return J

def gradientDescent(X, y, theta, m, alpha, num_iters):
  J_history = []
  for i in range(num_iters):
    error_value = np.zeros((1, m))
    error_value += np.concatenate(np.dot(X, theta)) - y
    J_history.append(computeCost(error_value, m))
    theta = theta - (alpha / m) * np.transpose(np.dot(error_value, X))

  return [J_history, theta]

In [None]:
theta = np.zeros(((n + 1), 1), dtype = int)
alpha = 0.001
num_iters = 500

In [None]:
[J_history, theta] = gradientDescent(X, y, theta, m, alpha, num_iters)
J_history = np.concatenate(J_history)

In [None]:
plt.plot(np.arange(1, num_iters + 1), J_history)

# Predict House Price

In [None]:
df_test = import_data(path = '/content/drive/Shareddrives/AI-Project/test.csv', column_id = 'Id')

In [None]:
df_test.dropna(subset = ["MasVnrType"], inplace = True)
df_test.dropna(subset = ["Electrical"], inplace = True)
df_test.reset_index(drop = True, inplace = True)

df_test = handle_with_missing_value(df_test, object_to_fillna)

df_test = non_numerical_feature_scaling(df_test)

number_of_test_example = df_test.shape[0]
df_test.insert(0, "Theta Zero", np.ones((number_of_test_example, 1), dtype = int), True)

df_test = df_test.to_numpy()

In [None]:
def predict_house_price(test_example, theta):
  return np.dot(test_example, theta)

In [None]:
print(predict_house_price(test_example = X[5], theta = theta))

[138025.89212924]
