In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
import math

In [2]:
df = pd.read_csv("datasets/updated_crestwood.csv", index_col=0)

In [3]:
df.head()

Unnamed: 0,Date_Sold,Price,Beds,Baths,Square_Feet
0,2022-02-03,569900,5,4.0,3557
1,2022-02-02,242500,2,1.0,1200
2,2022-01-28,565000,3,2.0,2118
3,2022-01-28,569900,4,4.0,3364
4,2022-01-28,415000,4,4.0,3185


In [4]:
df.dtypes

Date_Sold       object
Price            int64
Beds             int64
Baths          float64
Square_Feet      int64
dtype: object

In [5]:
df = df.astype({'Date_Sold': 'datetime64[ns]'})

In [6]:
df.dtypes

Date_Sold      datetime64[ns]
Price                   int64
Beds                    int64
Baths                 float64
Square_Feet             int64
dtype: object

In [7]:
reg = linear_model.LinearRegression()

In [8]:
reg.fit(df[['Square_Feet', 'Beds', 'Baths']], df['Price'])

LinearRegression()

In [9]:
reg.coef_

array([   72.11544819,  3220.78732651, 59937.95913413])

In [10]:
reg.intercept_

29550.134309156274

In [11]:
# Predict price of homes

# Test 1: 2000 sqft, 3 bed, 2 bath
reg.predict([[2000, 3, 2]])

array([303319.31094118])

In [12]:
# Test 2: 1500 sqft, 2 bed, 2 bath
reg.predict([[1500, 2, 2]])

array([264040.79951861])

In [13]:
# Test 3: 3300 sqft, 4 bed, 2 bath
reg.predict([[3300, 4, 2]])

array([400290.18091743])

In [14]:
d = {
    'area': [1000, 1200, 1500, 1750, 2000, 2200, 2500, 2750, 3000, 3200, 3500, 3700, 4000, 4200, 4500, 4700, 5000],
    'beds': [1, 2, 2, 3, 3, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 5, 6],
    'baths': [1, 1.5, 2, 2, 2, 3, 2.5, 3, 2.5, 3, 4, 4, 5, 4.5, 4, 5, 6]
}
df2 = pd.DataFrame(data=d)

prices = reg.predict(np.array(df2))

In [15]:
df2['Price_Estimate'] = prices
df2

Unnamed: 0,area,beds,baths,Price_Estimate
0,1000,1,1.0,164824.328962
1,1200,2,1.5,212437.185494
2,1500,2,2.0,264040.799519
3,1750,3,2.0,285290.448893
4,2000,3,2.0,303319.310941
5,2200,2,3.0,374459.572387
6,2500,3,2.5,369346.014604
7,2750,3,3.0,417343.856219
8,3000,3,2.5,405403.7387
9,3200,4,3.0,453016.595232


In [16]:
df2.to_csv('datasets/estimated.csv')