In [59]:
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score, r2_score
from sklearn.linear_model import LinearRegression

In [2]:
# Create an engine that connects to the SQLite database
engine = create_engine('sqlite:///housing_model.db')

In [3]:
# Define the SQL query for the training data using text()
query = text('SELECT * FROM train_data')

In [4]:
# Use Pandas to read the query results into a DataFrame
train_data = pd.read_sql_query(query, engine)
train_data

Unnamed: 0,id,beds,baths,sqft,latitude,longitude,price
0,7463218,3.0,2.0,1682.0,33.534412,-112.221540,385000
1,7464552,4.0,3.0,1868.0,33.522633,-112.231800,400000
2,7464673,3.0,2.0,1819.0,33.509426,-112.245700,435000
3,7468885,3.0,1.0,1006.0,33.492165,-112.212120,330000
4,7470268,3.0,2.0,1390.0,33.486015,-112.252590,370000
...,...,...,...,...,...,...,...
815,2078977316,2.0,3.0,1272.0,33.643810,-112.045210,325000
816,2084045887,1.0,1.0,728.0,33.586388,-112.204860,30000
817,2088234198,4.0,3.0,2303.0,33.484330,-112.000626,910000
818,2098280181,,,,33.469505,-112.024230,260000


In [5]:
train_data.drop(columns=['id'], inplace=True)

In [6]:
train_data_df = train_data.dropna()
train_data_df

Unnamed: 0,beds,baths,sqft,latitude,longitude,price
0,3.0,2.0,1682.0,33.534412,-112.221540,385000
1,4.0,3.0,1868.0,33.522633,-112.231800,400000
2,3.0,2.0,1819.0,33.509426,-112.245700,435000
3,3.0,1.0,1006.0,33.492165,-112.212120,330000
4,3.0,2.0,1390.0,33.486015,-112.252590,370000
...,...,...,...,...,...,...
814,5.0,3.0,2900.0,33.506016,-112.090930,720000
815,2.0,3.0,1272.0,33.643810,-112.045210,325000
816,1.0,1.0,728.0,33.586388,-112.204860,30000
817,4.0,3.0,2303.0,33.484330,-112.000626,910000


In [7]:
# Split our preprocessed data into our features and target arrays
X = train_data_df.drop(columns='price').values
y = train_data_df['price'].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [8]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [101]:
# Creating a model for housing price prediction
housing_price_model = Sequential([
    Dense(32, activation='linear', input_dim=len(X_train[0])),
    Dense(32, activation='linear'),
    Dense(1)
])

In [102]:
# Compiling the model for regression
housing_price_model.compile(optimizer='adam', loss='mean_absolute_error')

In [103]:
housing_price_model.fit(X_train, y_train, batch_size=64, epochs=100)

Epoch 1/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 732354.1250
Epoch 2/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 714276.5625 
Epoch 3/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 728308.0000 
Epoch 4/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 695529.3750 
Epoch 5/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 701127.6250 
Epoch 6/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 699419.5000 
Epoch 7/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 659045.9375 
Epoch 8/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 685523.0625 
Epoch 9/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 663197.8125 
Epoch 10/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 224934.6562 
Epoch 78/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 215574.3906 
Epoch 79/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 238285.2656 
Epoch 80/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 232453.3438 
Epoch 81/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 213642.9375 
Epoch 82/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 207562.5000 
Epoch 83/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 226469.6094 
Epoch 84/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 207581.9531 
Epoch 85/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 238086.4844 
Epoch 86/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x18e6518f3d0>

In [104]:
# Evaluate the model on the training data
loss = housing_price_model.evaluate(X_test, y_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 161187.1875  


In [14]:
train_data['price'].mean()

690797.3914634146

In [38]:
LR = LinearRegression()

In [40]:
LR.fit(X_train, y_train)

In [45]:
predict = LR.predict(X_test)
predict

array([ 5.81042786e+05,  5.72473980e+05,  4.21643264e+05,  4.24269622e+05,
        2.91526784e+05,  2.68855386e+05,  2.87612268e+05,  3.23082371e+05,
        3.79241621e+05,  8.27730412e+05,  7.60207089e+05,  1.32747389e+06,
        7.37642749e+05,  5.15659186e+05,  1.22476146e+05,  4.40984216e+05,
        5.66398570e+05,  5.70487708e+05,  4.09147161e+05,  1.11567207e+05,
        1.57938844e+05,  5.69234882e+05,  4.83159902e+05,  4.96722167e+05,
        1.03599491e+06,  8.53202019e+05,  8.40989924e+04,  3.77001756e+05,
        1.28123626e+06,  5.14108729e+05,  2.36207001e+06,  7.37139709e+05,
        5.28222794e+05,  1.45004358e+06,  2.06701727e+06,  5.94622287e+05,
        7.07360054e+05,  4.96416066e+05,  6.23841909e+05,  4.63671089e+05,
        7.80909764e+05,  1.36067145e+06,  3.07409197e+05,  1.21579718e+06,
        1.73685489e+06,  8.79147385e+05,  4.04889449e+05,  5.50647563e+05,
        3.98901688e+05,  5.44702601e+05,  5.90410219e+04,  1.06296726e+06,
        5.22357718e+05,  

In [50]:
mse = mean_squared_error(y_test, predict)
mse

128397146570.26184

In [57]:
mae = mean_absolute_error(y_test, predict)
mae

232308.20090095612

In [60]:
r2 = r2_score(y_test, predict)
r2

0.5043070472739098