In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

df = pd.read_csv('home_prices.csv')

In [2]:
df.head()

Unnamed: 0,Location,SquareFeet,Bedrooms,Bathrooms,Price
0,San Antonio,2833,4,3,419985
1,Houston,2743,3,1,361457
2,San Diego,2649,5,4,357192
3,Philadelphia,2867,3,2,418557
4,Phoenix,2596,2,4,446980


In [3]:
df = pd.get_dummies(df, columns=['Location'], drop_first=True)

In [4]:
df.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Price,Location_Houston,Location_Los Angeles,Location_New York,Location_Philadelphia,Location_Phoenix,Location_San Antonio,Location_San Diego
0,2833,4,3,419985,False,False,False,False,False,True,False
1,2743,3,1,361457,True,False,False,False,False,False,False
2,2649,5,4,357192,False,False,False,False,False,False,True
3,2867,3,2,418557,False,False,False,True,False,False,False
4,2596,2,4,446980,False,False,False,False,True,False,False


In [5]:
# Features (X) and target variable (y)
X = df[['SquareFeet', 'Bedrooms', 'Bathrooms'] + list(df.columns[5:])]
y = df['Price']

In [6]:
X.head()

Unnamed: 0,SquareFeet,Bedrooms,Bathrooms,Location_Los Angeles,Location_New York,Location_Philadelphia,Location_Phoenix,Location_San Antonio,Location_San Diego
0,2833,4,3,False,False,False,False,True,False
1,2743,3,1,False,False,False,False,False,False
2,2649,5,4,False,False,False,False,False,True
3,2867,3,2,False,False,True,False,False,False
4,2596,2,4,False,False,False,True,False,False


In [37]:
X.shape

(1000, 9)

In [7]:
y.head()

0    419985
1    361457
2    357192
3    418557
4    446980
Name: Price, dtype: int64

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [32]:
# Make predictions on the test set
predictions = model.predict(X_test)
predictions = predictions.astype(int)

In [35]:
# Calculate differences using list comprehension
differences = [predictions[i] - y_test.iloc[i] for i in range(len(predictions))]

# Create a DataFrame with predictions, actual values, and differences
results = pd.DataFrame({'predictions': predictions, 'actual': y_test.values, 'diff': differences})
results

Unnamed: 0,predictions,actual,diff
0,555156,312858,242298
1,546563,518331,28232
2,548823,446694,102129
3,536208,650733,-114525
4,552253,371177,181076
...,...,...,...
195,540619,331847,208772
196,544013,530914,13099
197,537265,761288,-224023
198,572626,460867,111759


In [34]:
positive_count = (results['diff'] > 0).sum()
negative_count = (results['diff'] < 0).sum()
zero_count = (results['diff'] == 0).sum()

print(f'Positive Count: {positive_count}')
print(f'Negative Count: {negative_count}')
print(f'Zero Count: {zero_count}')

Positive Count: 104
Negative Count: 96
Zero Count: 0


In [36]:
import pickle
# Save the model to a pickle file
with open('model.pkl', 'wb') as file:
    pickle.dump(model, file)

In [5]:
import pickle

# Load the model from the pickle file
with open('model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
# Make predictions on new data
predictions = loaded_model.predict([[2833,3,4,0,0,0,0,1,0]])

predictions = predictions.astype(int)

print(predictions[0])


562119




In [30]:
import pickle
import pandas as pd

# Load the model from the pickle file
with open('model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

# List of sample locations
locations = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix',
             'Philadelphia', 'San Antonio', 'San Diego']

def predict_price(square_feet, bedrooms, bathrooms, location):
    location_flags = [0] * len(locations)
    if location in locations:
        location_flags[locations.index(location)] = 1
    else:
        print(f'Error: Invalid location - {location}')
        return None
    predicted_price = loaded_model.predict([[square_feet+bedrooms+bathrooms]+location_flags])
    predictions = predicted_price.astype(int)
    return predictions[0]


In [31]:

# Example usage
square_feet = 2833
bedrooms = 4
bathrooms = 3
location = 'San Diego'

predicted_price = predict_price(square_feet, bedrooms, bathrooms, location)
if predicted_price is not None:
    print(f'Predicted Price: {predicted_price}')

Predicted Price: 525054


