In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score



# Load your dataset

In [2]:
df = pd.read_csv("C:/Users/Acer/Downloads/archive (19)/kc_house_data.csv")

# Display the first few rows of the dataset to understand its structure
print(df.head())



           id             date     price  bedrooms  bathrooms  sqft_living  \
0  7129300520  20141013T000000  221900.0         3       1.00         1180   
1  6414100192  20141209T000000  538000.0         3       2.25         2570   
2  5631500400  20150225T000000  180000.0         2       1.00          770   
3  2487200875  20141209T000000  604000.0         4       3.00         1960   
4  1954400510  20150218T000000  510000.0         3       2.00         1680   

   sqft_lot  floors  waterfront  view  ...  grade  sqft_above  sqft_basement  \
0      5650     1.0           0     0  ...      7        1180              0   
1      7242     2.0           0     0  ...      7        2170            400   
2     10000     1.0           0     0  ...      6         770              0   
3      5000     1.0           0     0  ...      7        1050            910   
4      8080     1.0           0     0  ...      8        1680              0   

   yr_built  yr_renovated  zipcode      lat     lo

# Define features and target variable

In [3]:

features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view',
            'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated',
            'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15']
target = 'price'


# Split the data into training and testing sets

In [4]:

X = df[features]
y = df[target]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Standardize the features

In [5]:

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



# Initialize and train the Linear Regression model

In [6]:

model = LinearRegression()
model.fit(X_train, y_train)



# Make predictions on the test set

In [7]:

y_pred = model.predict(X_test)


# Evaluate the model

In [8]:

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')



Mean Squared Error: 45173046132.79015
R^2 Score: 0.7011904448878412


# Make predictions for a new house

In [9]:


new_house_features = np.array([[3, 2, 2000, 5000, 1, 0, 0, 3, 7, 1800, 200, 1980, 2000, 98001, 47.5112, -122.257, 2000, 5000]])
new_house_features = scaler.transform(new_house_features)
predicted_price = model.predict(new_house_features)

print(f'Predicted Price for the new house: {predicted_price[0]}')


Predicted Price for the new house: 481856.1825114207


