# house price prediction

In [2]:
import sklearn
sklearn.__version__


'0.20.3'

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import joblib
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")


from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer

from scipy.stats import chi2_contingency
import scipy.stats as stats
from scipy.stats import norm ,rankdata

from scipy.special import boxcox1p
from scipy.stats import normaltest
import statsmodels
import statsmodels.api as sm
from scipy.optimize import curve_fit

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.layers import LSTM


from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import PowerTransformer

from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import classification_report,confusion_matrix
import xgboost as xgb

In [None]:
df = pd.read_csv("housing_train.csv")
df.T

In [None]:
print("Data Shape: "+str(df.shape))
print()
print("Columns: "+str(df.columns))
print()
print(df.info())
print()
df.describe()

## 1) Data Cleaning

My plan for data preparation: 
 - drop duplicated rows if any.
 - remove outliers
 - filling missing values

### a. Drop duplicates

In [4]:
print(df.duplicated().sum())
df.drop_duplicates(inplace=True)

0


### b. Removing Outliers

#### price and sqfeet outlier removal

In [5]:
def feature_outlier_removal(data, feature, min_q, max_q):
    feature_min_outlier_mask = data[feature] > data[feature].quantile(min_q)
    feature_max_outlier_mask = data[feature] < data[feature].quantile(max_q)
    data = data[(feature_min_outlier_mask) & (feature_max_outlier_mask)]
    print(feature, "min: ", min(data[feature]))
    print(feature, "max: ", max(data[feature]))
    return data

def numerical_outlier_removal(data):
    data = feature_outlier_removal(data, "price", 0.01, 0.999)
    data = feature_outlier_removal(data, "sqfeet", 0.002, 0.999)
    return data

In [None]:
raw_df = numerical_outlier_removal(df)
print("\nOutliers Removed :", df.shape[0] - raw_df.shape[0])
print("Data Shape: ", raw_df.shape[0])

#### beds and baths outlier removal

In [None]:
raw_df = raw_df[raw_df['beds'] <= 6] 
raw_df = raw_df[raw_df['baths'] <= 3.5] 

print("Data Shape: ", raw_df.shape[0])

#### lat and long outlier removal

The valid range of latitude in degrees is -90 and +90 for the southern and northern hemisphere respectively. Longitude is in the range -180 and +180 specifying coordinates west and east of the Prime Meridian, respectively. But here we are dealing wiht US data, so, Lat-long coorditates for cities in United States are in range: Latitude from 19.50139 to 64.85694 and longitude from -161.75583 to -68.01197.

In [None]:
lat_min_mask = raw_df['lat'] >= 19.50139
lat_max_mask = raw_df['lat'] <= 64.85694
raw_df = raw_df[(lat_min_mask) & (lat_max_mask)]

long_min_mask = raw_df['long'] >= -161.75583
long_max_mask = raw_df['long'] <= -68.01197
raw_df = raw_df[(long_min_mask) & (long_max_mask)]

print("lat min: ", min(raw_df.lat))
print("lat max: ", max(raw_df.lat))
print("long min: ", min(raw_df.long))
print("long max: ", max(raw_df.long))
print("Data Shape: ", raw_df.shape[0])

In [9]:
def Lat_long_outlier_removal(data):
    data = feature_outlier_removal(data, "lat", 0.01, 0.999)
    data = feature_outlier_removal(data, "long", 0.01, 0.999)
    return data

In [None]:
lat_long_df = Lat_long_outlier_removal(raw_df)
print("\nOutliers Removed :", raw_df.shape[0] - lat_long_df.shape[0])
print("Data Shape: ", lat_long_df.shape[0])

### c. Filling missing values

In [None]:
missing = raw_df.isnull().sum()
missing = missing[missing > 0]
print(missing)
missing.sort_values(inplace=True)
try:
    missing.plot.bar()
except:
    pass

#### work on Laundry Options: Model based imputation (filling missing values)

In [None]:
#before imputation
print(raw_df["laundry_options"].value_counts())
print(raw_df["laundry_options"].isna().sum())

In [13]:
raw_df["laundry_options_na"] = 0
raw_df["laundry_options_na"][raw_df["laundry_options"][raw_df["laundry_options"].isna()==True].index] = 1

In [None]:
from sklearn.neighbors import KNeighborsClassifier
decide_cols = ["beds", "baths", "cats_allowed", "dogs_allowed", 
               "smoking_allowed", "wheelchair_access", "electric_vehicle_charge",
               "comes_furnished", "price" ]

X_train = raw_df[decide_cols][raw_df["laundry_options"].isna()==False]
y_train = raw_df["laundry_options"][raw_df["laundry_options"].isna()==False]
X_test = raw_df[decide_cols][raw_df["laundry_options"].isna()==True]
 
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
laundry_pred = neigh.predict(X_test)
print(laundry_pred)
print(laundry_pred.size)

# filling missing values
raw_df["laundry_options"][raw_df["laundry_options"].isna()==True] = laundry_pred

In [None]:
#after imputation
print(raw_df["laundry_options"].value_counts())
print(raw_df["laundry_options"].isna().sum())

#### work on Parking Options: Model based imputation (filling missing values)

In [None]:
#before imputation
print(raw_df["parking_options"].value_counts())
print(raw_df["parking_options"].isna().sum())

In [17]:
raw_df["parking_options_na"] = 0
raw_df["parking_options_na"][raw_df["parking_options"][raw_df["parking_options"].isna()==True].index] = 1

In [None]:
from sklearn.neighbors import KNeighborsClassifier
decide_cols = ["beds", "baths", "cats_allowed", "dogs_allowed", 
               "smoking_allowed", "wheelchair_access", "electric_vehicle_charge",
               "comes_furnished", "price" ]

X_train = raw_df[decide_cols][raw_df["parking_options"].isna()==False]
y_train = raw_df["parking_options"][raw_df["parking_options"].isna()==False]
X_test = raw_df[decide_cols][raw_df["parking_options"].isna()==True]
 
neigh = KNeighborsClassifier(n_neighbors=7)
neigh.fit(X_train, y_train)
laundry_pred = neigh.predict(X_test)
print(laundry_pred)
print(laundry_pred.size)

# filling missing values

raw_df["parking_options"][raw_df["parking_options"].isna()==True] = laundry_pred


In [None]:
#after imputation
print(raw_df["parking_options"].value_counts())
print(raw_df["parking_options"].isna().sum())

#### removing remaining empty features

In [None]:
print(raw_df.isnull().sum())
raw_df.dropna(inplace=True)

In [None]:
clean_df = raw_df.copy()
print(clean_df.columns)
print(clean_df.info())
clean_df.describe()

## 1) Feature Engineering

My plan for data preparation:
 - firstly remove unnecessary cols
 - create sqfeet range column
 - work on url
 - work on Latitude and Longitude
 - work on description



In [22]:
try:
    clean_df = clean_df.drop(['url', 'region_url', 'image_url'], axis=1)
except:
    pass

### a. Create sqfeet range column

![img](https://www.point2homes.com/news/wp-content/uploads/2017/01/Home-Size-Table-by-Province-CA.png)

In [23]:
# def sqfeet_range_column(data, feature='sqfeet'):
#     if data[feature] < 300:
#         return 'single room'
#     if data[feature] >= 300 and data[feature] < 500:
#         return 'mini'
#     if data[feature] >= 500 and data[feature] < 1000:
#         return 'small'
#     if data[feature] >= 1000 and data[feature] < 1500:
#         return 'medium'
#     if data[feature] >= 1500 and data[feature] < 2000:
#         return 'large'
#     if data[feature] >= 2000 and data[feature] < 2500:
#         return 'extra large'
#     if data[feature] >=2500:
#         return 'mansion'
    

# clean_df['sqfeet_range'] = clean_df.apply(sqfeet_range_column, axis=1)
# clean_df.sqfeet_range.value_counts()

### b. work on url

In [24]:
# #TODO: open this
# from urllib.parse import urlparse, parse_qs
# from tqdm import tqdm
# disc = {"url_parsed_loc": [], "url_params":[], "url_path_components": []}

# for i in tqdm(range(df.shape[0]), position=0, leave=True):
#     parsed = urlparse(df.url[i])
#     disc["url_parsed_loc"].append(parsed.netloc)
    
#     params = parse_qs(parsed.query)
#     disc["url_params"].append(params)
    
#     path_components = list(filter(bool, parsed.path.split('/')))
#     disc["url_path_components"].append(path_components)


# #TODO: open this

# url_df = pd.DataFrame(disc)
# print(url_df.url_params.value_counts())
# url_df.head()

# #TODO: open this
# url_df.drop("url_params", axis=1, inplace=True)
# url_df.head()



# #TODO: open this
# cnt=0
# for i in tqdm(range(df.shape[0]), position=0, leave=True):
#     if df["region_url"][i].find(url_df["url_parsed_loc"][i]) >= 0: 
#         cnt+=1

# print("Count: "+str(cnt))
# print("Error rate: "+str(1-(cnt/df.shape[0])))

### c. work on Latitude and Longitude

In [25]:
# # TODO: open this
# from sklearn.cluster import KMeans

# sse={}
# lat_long_df = clean_df[['lat', 'long']]

# for k in tqdm(range(1, 12), position=0, leave=True):
#     kmeans = KMeans(n_clusters=k, max_iter=1000).fit(lat_long_df)
#     lat_long_df["clusters"] = kmeans.labels_
#     sse[k] = kmeans.inertia_ 
# plt.figure()
# plt.plot(list(sse.keys()), list(sse.values()))
# plt.xlabel("Number of cluster")
# plt.show()

In [26]:
kmeans = KMeans(n_clusters=8, random_state=0)
lat_long_pred = kmeans.fit_predict(clean_df[["lat", "long"]])
print(lat_long_pred.size)
clean_df['lat_long_cluster'] = lat_long_pred

258418


In [27]:
clean_df = clean_df.reset_index(drop=True)

In [None]:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(figsize=(10,10))
plt.scatter(x=clean_df['lat'], y=clean_df['long'], c=lat_long_pred)
plt.show()

### d.work on description

In [29]:
# stopwords= set(['br', 'the', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've",\
#             "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', \
#             'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their',\
#             'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', \
#             'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', \
#             'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', \
#             'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',\
#             'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',\
#             'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',\
#             'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', \
#             's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', \
#             've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',\
#             "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',\
#             "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", \
#             'won', "won't", 'wouldn', "wouldn't"])

In [30]:
# def decontracted(phrase):
#     # specific
#     phrase = re.sub(r"won't", "will not", phrase)
#     phrase = re.sub(r"can\'t", "can not", phrase)

#     # general
#     phrase = re.sub(r"n\'t", " not", phrase)
#     phrase = re.sub(r"\'re", " are", phrase)
#     phrase = re.sub(r"\'s", " is", phrase)
#     phrase = re.sub(r"\'d", " would", phrase)
#     phrase = re.sub(r"\'ll", " will", phrase)
#     phrase = re.sub(r"\'t", " not", phrase)
#     phrase = re.sub(r"\'ve", " have", phrase)
#     phrase = re.sub(r"\'m", " am", phrase)
#     return phrase

In [31]:
# def clean_text(sentance):
#     sentance = re.sub(r"http\S+", "", sentance)
#     sentance = BeautifulSoup(sentance, 'lxml').get_text()
#     sentance = decontracted(sentance)
#     sentance = re.sub("\S*\d\S*", "", sentance).strip()
#     sentance = re.sub('[^A-Za-z]+', ' ', sentance)
#     # https://gist.github.com/sebleier/554280
#     sentance = ' '.join(e.lower() for e in sentance.split() if e.lower() not in stopwords)
#     return sentance.strip()

In [32]:
# for i in tqdm(range(loop), position=0, leave=True):
#     df.description[i] = clean_text(df.description[i])

In [33]:
# df.head()
# df = df.drop(df.index[4]).reset_index()
# print(df.shape)

In [34]:
# for i in tqdm(range(loop), position=0, leave=True):
#     try:
#         df.description[i] = clean_text(df.description[i])
#     except:
#         df.drop(df.index[i])

In [35]:
# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 
# sid_obj = SentimentIntensityAnalyzer()

# description_dict = {"description_negative":[], "description_neutral": [], "description_positive":[]}


# loop = clean_df.shape[0]
# for i in tqdm(range(loop), position=0, leave=True):
#     desc = str(clean_df.description[i])
#     sentiment_dict = sid_obj.polarity_scores(desc) 
#     description_dict["description_negative"].append(sentiment_dict["neg"])
#     description_dict["description_neutral"].append(sentiment_dict["neu"])
#     description_dict["description_positive"].append(sentiment_dict["pos"]) 

In [36]:
# joblib.dump(description_dict, './pickles/description_dict.pkl') 

In [37]:
# description_dict = joblib.load('./pickles/description_dict.pkl') 

# desc_df = pd.DataFrame(description_dict)
# print(desc_df.shape)
# desc_df.head()

# clean_df = pd.concat([clean_df, desc_df], axis=1)

In [38]:
clean_df = clean_df.drop(["description"], axis=1)

In [None]:
clean_df.corr()
f, ax = plt.subplots(figsize=(16, 16))
sns.heatmap(clean_df.corr(), annot=True, linewidths=0.5, square=True, vmax=0.3, center=0, cmap=sns.cubehelix_palette())

In [40]:
# kernel stop here

In [None]:
clean_df.to_csv('clean_df.csv', index = False)
clean_df.head()

## 3) Model Building

In [None]:
df = pd.read_csv("clean_df.csv")
# df.drop(['state'], axis=1, inplace=True)
df = df.reindex(sorted(df.columns), axis=1)

df.T

In [43]:
# df.state.value_counts()

In [44]:
df.dropna(inplace=True)
df.shape

(258418, 21)

In [None]:
df = pd.get_dummies(df,drop_first=False)
df.head()

In [46]:
# df = df.iloc[:1000, :]

In [None]:
df_X = df.drop(["id", "price"], axis=1)
df_y = df.loc[:, "price"]
print(df_X.info())
df_X.shape

In [None]:
scaler = MinMaxScaler()
df_X = scaler.fit_transform(df_X)
# X_test = scaler.transform(X_test)
print(df_X)


In [49]:
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.2, random_state=42)

In [50]:
# X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# regressor = Sequential()
# # Adding the first LSTM layer and some Dropout regularisation
# regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
# regressor.add(Dropout(0.2))
# regressor.add(LSTM(units = 50, return_sequences = True))
# regressor.add(Dropout(0.2))
# regressor.add(LSTM(units = 50, return_sequences = True))
# regressor.add(Dropout(0.2))
# regressor.add(LSTM(units = 50))
# regressor.add(Dropout(0.2))

# regressor.add(Dense(units = 1))

# regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
# regressor.fit(X_train, y_train, epochs = 1, batch_size = 5)

In [51]:
from sklearn import metrics as skmetrics

def calculate_regression_metrics(y_test, predictions):
    mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
    mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)
    r2_error = skmetrics.r2_score(y_test, predictions)
    result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error, 'r2_score': r2_error}
    return result 

In [52]:
# from sklearn.preprocessing import PolynomialFeatures
# from sklearn import linear_model

# poly = PolynomialFeatures(degree=2)
# X_train_ = poly.fit_transform(X_train)
# X_test_ = poly.fit_transform(X_test)

# clf = linear_model.LinearRegression()
# clf.fit(X_, y_train)
# ppp = clf.predict(X_test_)
# print(skmetrics.r2_score(y_test, ppp))

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

model = RandomForestRegressor(n_jobs=-1)
model.fit(X_train, y_train)
pred = model.predict(X_test)
pred = pred.reshape(-1, 1)

print(pred)
print("//////////////////////////////////////")
print(y_test)
print("//////////////////////////////////////")

calculate_regression_metrics(y_test, pred)

In [54]:
# from sklearn.model_selection import GridSearchCV

# param_test = {
#     'bootstrap': [True],
#     'max_depth': range(20, 100, 20),
#     'max_features': [2, 3],
#     'min_samples_leaf': [2, 4, 6],
#     'min_samples_split': [6, 10, 14],
#     'n_estimators': range(50, 500, 100)
# }

# gsearch = GridSearchCV(estimator = RandomForestRegressor(n_jobs=-1), 
#                        param_grid = param_test, 
#                           cv = 2, 
#                        n_jobs = -1, 
#                        verbose = 2)


# gsearch.fit(X_train,y_train)
# print(gsearch.best_params_, gsearch.best_score_)

# tuned_pred = gsearch.predict(X_test)
# model_evaluation(y_test, tuned_pred)

In [55]:
# fig, ax = plt.subplots(1, 1, figsize=(21,21))
# ax.scatter(range(y_test.size), y_test)
# ax.scatter(range(y_test.size), pred)

# # ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4)
# ax.set_xlabel('Measured')
# ax.set_ylabel('Predicted')
# plt.show()

In [56]:
# fig, ax = plt.subplots(1, 1, figsize=(21,21))
# ax.plot(y_test)
# ax.plot(pred)

In [57]:
# xg_reg = xgb.XGBRegressor(n_jobs=6)
# xg_reg.fit(X_train, y_train)
# xg_reg = xg_reg.predict(X_test)


# pred = xg_reg.reshape(-1, 1)

# # pred = scalery.inverse_transform(pred)
# # y_test = scalery.inverse_transform(y_test)

# print(pred)
# print("//////////////////////////////////////")
# print(y_test)
# print("//////////////////////////////////////")



# calculate_regression_metrics(y_test, pred)

In [58]:
testing below

SyntaxError: invalid syntax (<ipython-input-58-e58ba6b6f4c4>, line 1)

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Mon May 25 18:06:28 2020

@author: paras
"""

from flask import Flask, request
import flask
import joblib

import pandas as pd
import numpy as np

app = Flask(__name__)

@app.route('/')
def index():
    return flask.render_template('index.html')


def clean_data(raw_df):     
    lat_min_mask = raw_df['lat'] >= 19.50139
    lat_max_mask = raw_df['lat'] <= 64.85694
    raw_df = raw_df[(lat_min_mask) & (lat_max_mask)]
    long_min_mask = raw_df['long'] >= -161.75583
    long_max_mask = raw_df['long'] <= -68.01197
    raw_df = raw_df[(long_min_mask) & (long_max_mask)]    
    raw_df.dropna(inplace=True)
    return raw_df


def feature_engineer_data(clean_df):
    print("test0: ###########################")
    print(clean_df.shape)
    try:
        clean_df = clean_df.drop(['id', 'url', 'region_url', 'image_url', 'description'], axis=1)
    except:
        print("Custom Error: drop columns did not execute!!")
    
    lat_long_pred = lat_long_classifier.predict(clean_df[["lat", "long"]])
    print("test1: ###########################")
    print(clean_df.shape)
    clean_df['lat_long_cluster'] = lat_long_pred
    clean_df = clean_df.reset_index(drop=True)
    clean_df = clean_df.reindex(sorted(clean_df.columns), axis=1)
    clean_df.fillna(-1)
    
    print("test2: ###########################")
    print(clean_df.shape)
    
    clean_df = pd.get_dummies(clean_df,drop_first=True)
    print("test3: ###########################")
    print(clean_df.shape)
    return clean_df

def scale_data(df):
    new_df = min_max_scaler.transform(df)
    return new_df

def prdict_results(df):
    random_regressor_pred = random_regressor.predict(df)
    return random_regressor_pred



def process_input_data(df_input):
    int_cols = ['id', 'sqfeet', 'beds', 'cats_allowed', 
                'dogs_allowed', 'smoking_allowed', 
                'wheelchair_access', 'electric_vehicle_charge', 
                'comes_furnished']
    float_cols = ['baths', 'lat', 'long']
    
    df_input[int_cols] = df_input[int_cols].astype('int64')
    df_input[float_cols] = df_input[float_cols].astype('float64')
    print(df_input.info())
    
    print('CLEANING DATA..............')
    clean_df = clean_data(df_input)
    print('FEATURING DATA.............')
    df_featured = feature_engineer_data(clean_df)  
    print("test4: ###########################")
    print(df_featured.shape)
    print("DATA COLUMNS: //////////////")
    print(data_columns)
    sample_df = pd.DataFrame(columns = data_columns)
    main_df = sample_df.append(df_featured)
    main_df = main_df.fillna(0)
    print("MAIN DATAFRAME: //////////////")
    print(main_df)
    print(main_df.info())
    print(main_df.columns)
    
    for i in main_df.columns:
        if main_df[i].dtypes == 'float64':
            print(i, end="\n\n")
    
    print('SCALING DATA.............')
    df_scaled = scale_data(main_df)
    return df_scaled


@app.route('/predict', methods=['POST'])
def predict():
    form_data = request.form.to_dict()
    print("FORM DATA: //////////////")
    print(form_data)
    
    df_input = pd.DataFrame.from_records([form_data])
    df_input = pd.DataFrame(df_input)
    print("INPUT DATAFRAME: //////////////")
    print(df_input)       
    
    df_scaled = process_input_data(df_input)
    
    pred_val = ""
    pred_val = np.round(prdict_results(df_scaled), 2)
    print("PREDICTION: ////////////////")
    print(pred_val)
    msg = f"Wohoo! AI predicts the price of this property to be around {pred_val[0]} $"
    return flask.render_template('index.html', 
                                 predicted_value="{}".format("Prediction: "+str(pred_val[0])+" $"), 
                                 any_message=msg)





@app.route('/predict_multiple', methods=['POST'])
def predict_multiple():
    form_data = request.form.to_dict()
    print("FORM DATA")
    form_data_array = np.array(form_data["myarray"])
    print(form_data_array)

    js_df = pd.read_json(form_data["myarray"])
    
    df_input = pd.DataFrame.from_records(js_df)
    
    df_input.columns = df_input.iloc[0]
    df_input = df_input.iloc[1:, 1:]
    print("INPUT DATAFRAME")
    print(df_input.head())
    print(df_input.info())
    
    df_scaled = process_input_data(df_input)
    

    pred_val = ""
    msg = "Wohoo! AI predicts the price of this property."

    pred_val = prdict_results(df_scaled)
    print("PREDICTION: ////////////////")
    print(pred_val)
    
    res = pd.DataFrame({"id": df_input["id"], "prediction": pred_val})
    print("RESULT: //////////////")
    print(res) 
    res_json = res.to_json(orient='records')
    return flask.render_template('index.html', 
                                 predicted_value_multi=str(res_json), 
                                 any_message_multi=msg)
    
    
    


if __name__ == '__main__':
    random_regressor = joblib.load("./pickles/random_regressor.pkl")
    min_max_scaler = joblib.load("./pickles/min_max_scaler.pkl")
    data_columns = joblib.load("./pickles/data_columns.pkl")
    lat_long_classifier = joblib.load("./pickles/lat_long_classifier.pkl")
    
    app.run(host='0.0.0.0', port=8088)