In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle
import joblib

import pandas as pd
import numpy as np
import seaborn as sns
import datetime as dt
import matplotlib.pyplot as plt

In [3]:
housing = pd.read_csv('USA_Housing.csv')

housing.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price,Address
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0,"208 Michael Ferry Apt. 674\nLaurabury, NE 3701..."
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0,"188 Johnson Views Suite 079\nLake Kathleen, CA..."
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0,"9127 Elizabeth Stravenue\nDanieltown, WI 06482..."
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0,USS Barnett\nFPO AP 44820
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5,USNS Raymond\nFPO AE 09386


In [4]:
housing.rename(columns = {'Avg. Area Income':'Area_Income'}, inplace = True)
housing.rename(columns = {'Avg. Area House Age':'Area_House_Age'}, inplace = True)
housing.rename(columns = {'Avg. Area Number of Rooms':'Area_Number_Rooms'}, inplace = True)
housing.rename(columns = {'Avg. Area Number of Bedrooms':'Area_Number_Bedrooms'}, inplace = True)
housing.rename(columns = {'Area Population':'Area_Populations'}, inplace = True)

In [5]:
housing.isnull().sum()

Area_Income             0
Area_House_Age          0
Area_Number_Rooms       0
Area_Number_Bedrooms    0
Area_Populations        0
Price                   0
Address                 0
dtype: int64

In [6]:
housing = housing.drop(['Address'], axis=1)
housing.head()

Unnamed: 0,Area_Income,Area_House_Age,Area_Number_Rooms,Area_Number_Bedrooms,Area_Populations,Price
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5


In [7]:
# Splitting data into train and test sets
x = housing.drop(['Price'],axis=1)
y = housing['Price']

X_train, X_test, y_train, y_test = train_test_split(
    x, y, train_size=0.70,test_size=0.30, random_state=0)
print(X_train.shape, X_test.shape)

(3500, 5) (1500, 5)


In [8]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn import metrics

model_RFR=RandomForestRegressor(n_estimators = 1000, random_state = 42)
model_RFR.fit(X_train,y_train)
y_pred=model_RFR.predict(X_test)

print('R2 Value:',metrics.r2_score(y_test, model_RFR.predict(X_test)))
print('Accuracy',100- (np.mean(np.abs((y_test - y_pred ) / y_test)) * 100))
pd.Series(model_RFR.feature_importances_, index=x.columns).sort_values(ascending=False)

R2 Value: 0.888424780124613
Accuracy 90.40888727311193


Area_Income             0.428232
Area_House_Age          0.237280
Area_Populations        0.188185
Area_Number_Rooms       0.128525
Area_Number_Bedrooms    0.017778
dtype: float64

In [9]:
# Import library for metrics
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Mean absolute error (MAE)
mae = mean_absolute_error(y_test, y_pred)
# Mean squared error (MSE)
mse = mean_squared_error(y_test, y_pred)
# R-squared scores
r2 = r2_score(y_test.values,y_pred)
# Print metrics
print('Mean Absolute Error:', round(mae, 2))
print('Mean Squared Error:', round(mse, 2))
print('R-squared scores:', round(r2, 2))


Mean Absolute Error: 95982.06
Mean Squared Error: 14431051360.91
R-squared scores: 0.89


In [10]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
# Train the model
model.fit(X_train, y_train)
# Use model to make predictions
y_pred = model.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
# Printout relevant metrics
print("Model Coefficients:", model.coef_)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Coefficient of Determination:", r2_score(y_test, y_pred))

pd.Series(model.coef_, index=x.columns).sort_values(ascending=False)

#Save model
pickle.dump(model, open('model.pkl', 'wb'))

Model Coefficients: [2.16187374e+01 1.66145180e+05 1.21010577e+05 1.76003780e+03
 1.51647974e+01]
Mean Absolute Error: 81563.14733994487
Coefficient of Determination: 0.9200757649412041


In [11]:
# Make prediction
import warnings
warnings.filterwarnings('ignore')

predict = model.predict(X_test)
result = X_test
result['Price'] = y_test
result['Predic_Price'] = predict.tolist()
result.head()

Unnamed: 0,Area_Income,Area_House_Age,Area_Number_Rooms,Area_Number_Bedrooms,Area_Populations,Price,Predic_Price
398,61200.726175,5.299694,6.234615,4.23,42789.692217,894251.068636,969608.346806
3833,63380.81467,5.344664,6.001574,2.45,40217.333577,932979.360621,953868.155486
4836,71208.269301,5.300326,6.077989,4.01,25696.361741,920747.911288,907506.328361
4572,50343.763518,6.027468,5.16024,4.35,27445.876739,691854.921027,493325.260323
636,54535.453719,5.278065,6.871038,4.41,30852.207006,732733.236293,718221.210115


In [12]:
!pip install flask --quiet
!pip install flask-ngrok --quiet

In [13]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.tgz

--2024-07-08 09:27:34--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.tgz
Resolving bin.equinox.io (bin.equinox.io)... 52.202.168.65, 54.237.133.81, 54.161.241.46, ...
Connecting to bin.equinox.io (bin.equinox.io)|52.202.168.65|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13856790 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.tgz’


2024-07-08 09:27:36 (18.7 MB/s) - ‘ngrok-stable-linux-amd64.tgz’ saved [13856790/13856790]



In [14]:
!tar -xvf /content/ngrok-stable-linux-amd64.tgz

ngrok


In [26]:
!./ngrok http http://localhost:8080

Your ngrok-agent version "2.3.41" is too old. The minimum supported agent version for your account is "3.2.0". Please update to a newer version with `ngrok update`, by downloading from https://ngrok.com/download, or by updating your SDK version. Paid accounts are currently excluded from minimum agent version requirements. To begin handling traffic immediately without updating your agent, upgrade to a paid plan: https://dashboard.ngrok.com/billing/subscription.

ERR_NGROK_121



In [21]:
!./ngrok authtoken 2ixU2HI2cWOO7VkHXw64agxgwVX_5ggeVsbN1Ddz8WGzndyC1

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [22]:
# import Flask from flask module
from flask import Flask

# import run_with_ngrok from flask_ngrok to run the app using ngrok
from flask_ngrok import run_with_ngrok
from flask import Flask, request, render_template
app = Flask(__name__) #app name
run_with_ngrok(app)

model = pickle.load(open('model.pkl','rb'))

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/', methods = ['POST'])
def predict():
    int_features = [int(x) for x in request.form.values()]
    features = [np.array(int_features)]
    prediction = model.predict(features)

    output = round(prediction[0], 2)

    if output < 0:
        return render_template('index.html', prediction_text = " Values entered not reasonable")
    elif output >= 0:
        return render_template('index.html', prediction_text = 'Predicted Price of the house is: ${}'.format(output))

#Run app
if __name__ == "__main__":
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Exception in thread Thread-11:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connection.py", line 203, in _new_conn
    sock = connection.create_connection(
  File "/usr/local/lib/python3.10/dist-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/usr/local/lib/python3.10/dist-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 791, in urlopen
    response = self._make_request(
  File "/usr/local/lib/python3.10/dist-packages/urllib3/connectionpool.py", line 497, in _make_request
    conn.request(
  File "/usr/local/lib/python3.10/dist-packages/urllib3