# Housing Prices Competition for Kaggle Learn Users

Ask a home buyer to describe their dream house, and they probably won't begin with the height of the basement ceiling or the proximity to an east-west railroad. But this playground competition's dataset proves that much more influences price negotiations than the number of bedrooms or a white-picket fence.

With 79 explanatory variables describing (almost) every aspect of residential homes in Ames, Iowa, this competition challenges you to predict the final price of each home.

The Ames Housing dataset was compiled by Dean De Cock for use in data science education. It's an incredible alternative for data scientists looking for a modernized and expanded version of the often cited Boston Housing dataset. 

### Import all necessary libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, OneHotEncoder
import numpy as np

# Data Monitoring and Pre-processing

### Loading the Data

In [None]:
df_train = pd.read_csv(r"home-data-for-ml-course/train.csv")
df_test = pd.read_csv(r"home-data-for-ml-course/test.csv")

In [None]:
# # df_train = df_train.replace("NaN", " ", regex=True)
# df_train = df_train.fillna((0))
# # df_test = df_test.replace("NaN", " ", regex=True)
# df_test = df_test.fillna(float(0))

### Splitting the Data into input and output

In [None]:
X_train = df_train.drop(["Id", "SalePrice"], axis=1)
y_train = df_train.SalePrice

X_test = df_test.drop(["Id"], axis=1) # test data

print("Train: ", X_train.shape)
print("Test: ", X_test.shape)

### Identifyng object items

In [None]:
train_object_list = []
test_object_list = []
for index, value in enumerate(X_train.dtypes):
    if value == "O":
        train_object_list.append(X_train.dtypes.index[index])

for index, value in enumerate(X_test.dtypes):
    if value == "O":
        test_object_list.append(X_test.dtypes.index[index])

### Object item Dataframe

In [None]:
train_obj_df = pd.DataFrame(data= X_train[train_object_list])
test_obj_df = pd.DataFrame(data= X_test[test_object_list])

### converting NaN values to string 0 at object DataFrame

In [None]:
train_obj_df = train_obj_df.fillna(str(0))
test_obj_df = test_obj_df.fillna(str(0))

### Identifyng the mixed type items from object items

In [None]:
train_mixed_val = []
test_mixed_val = []
for i in train_object_list:
    for j in range(len(X_train[i])):
        if type(X_train[i][j]) == float:
            train_mixed_val.append(i)
            break
            
for i in test_object_list:
    for j in range(len(X_test[i])):
        if type(X_test[i][j]) == float:
            test_mixed_val.append(i)
            break

In [None]:
train_obj_df.shape

In [None]:
len(train_mixed_val)

### Converting float to str at object DataFrame

In [None]:
for i in train_mixed_val:
    for j in range(len(train_obj_df[i])):
        if type(train_obj_df[i][j]) == float:
            train_obj_df[i][j] = str(train_obj_df[i][j])
        elif type(train_obj_df[i][j]) == int:
            train_obj_df[i][j] = str(train_obj_df[i][j])
        else:
            pass

for i in test_mixed_val:
    for j in range(len(test_obj_df[i])):
        if type(test_obj_df[i][j]) == float:
            test_obj_df[i][j] = str(test_obj_df[i][j])
        elif type(test_obj_df[i][j]) == int:
            test_obj_df[i][j] = str(test_obj_df[i][j])
        else:
            pass

### Dropping the object items

In [None]:
X_train = X_train.drop(train_object_list, axis=1)
X_test = X_test.drop(test_object_list, axis=1)

In [None]:
print("Train: ", X_train.shape)
print("Test: ", X_test.shape)

### Converting NaN values of X DataFrame

In [None]:
X_train = X_train.fillna(float(0))
X_test = X_test.fillna(float(0))

### Data Encoding

In [None]:
class Encoding:
    def ordinal_encoder(self, train, test):
        oe = OrdinalEncoder()
        X_train_enc = oe.fit_transform(train)
        X_test_enc = oe.fit_transform(test)
        
        return X_train_enc, X_test_enc
    
    def one_hot_encoder(self, train, test):
        ohe = OneHotEncoder()
        
        X_train_enc = ohe.fit_transform(train)
        X_test_enc = ohe.fit_transform(test)
        
        return X_train_enc, X_test_enc
    
    def label_encoder(self, train):
        le = LabelEncoder()
        y_train_enc = le.fit_transform(train)
#         X_test_enc = le.fit_transform(test)
        
        return y_train_enc

In [None]:
encode = Encoding()

### Encoding the remained X dataFrame

In [None]:
X_train_enc, X_test_enc = encode.ordinal_encoder(X_train, X_test)

### Encoding the object DataFrame

In [None]:
tr_ob_df, te_ob_df = encode.ordinal_encoder(train_obj_df, test_obj_df)

### Get back to the DataFrame again after encoding

In [None]:
train_obj_df = pd.DataFrame(data= tr_ob_df, columns= train_obj_df.columns)
test_obj_df = pd.DataFrame(data= te_ob_df, columns= test_obj_df.columns)

In [None]:
X_train = pd.DataFrame(data= X_train_enc, columns= X_train.columns)
X_test = pd.DataFrame(data= X_test_enc, columns= X_test.columns)

### Concate these two DataFrame

In [None]:
final_train = pd.concat([X_train, train_obj_df], axis=1)
final_test = pd.concat([X_test, test_obj_df], axis=1)

In [None]:
final_train.shape, final_test.shape

# Train The Nueral Network Model

### Model Specifyng

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def epoch_end(self, epoch, logs={}):
        if (logs.get("loss") < 5.000):
            print("Reached 99.9% accuracy so cancelling training!")
            self.model.stop_running = True

callback = myCallback()


model = tf.keras.models.Sequential([tf.keras.layers.Dense(units=81, activation="relu", input_shape=[final_train.shape[1]]),
                                    tf.keras.layers.Dense(units=64, activation="relu"),
#                                     tf.keras.layers.Dense(units=32, activation="relu"),
#                                     tf.keras.layers.Dense(units=16, activation="relu"),
#                                     tf.keras.layers.Dense(units=78, activation="relu"),
                                    tf.keras.layers.Dense(units=1)])
    
model.compile(loss="mae",optimizer='adam', metrics=["accuracy"])
history = model.fit(final_train, 
                    y_train,
                    epochs = 500,
                    batch_size= 1,
                    validation_split = 0.2,
                    callbacks = [callback])

### Make Prediction on test data

In [None]:
y_prediction = model.predict(final_test).flatten()

data = {"Id": df_test.Id,
        "SalePrice": y_prediction}

df_final = pd.DataFrame(data)
df_final.to_csv("final_sub.csv", index=False)


df_final.head()