In [197]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [198]:
df = pd.read_csv('data.csv')

In [None]:
print(df.head())

In [None]:
print(df.describe())

In [None]:
df.columns

In [None]:
print(df.isnull().sum())

In [None]:
non_numeric_columns = df.select_dtypes(exclude=[np.number]).columns
print("Non-numeric columns:", non_numeric_columns)
df = df.drop(columns=non_numeric_columns)

In [None]:
sns.pairplot(df)

In [None]:
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Matrix")
plt.show()

In [144]:
X = df[['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition']]
y = df['price']

In [178]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
X_train

In [180]:
model = LinearRegression()

In [None]:
model.fit(X_train, y_train)

In [182]:
coeff_df = pd.DataFrame(model.coef_,X.columns,columns=['coefficient'])

In [None]:
coeff_df

In [191]:
#Model evaluation
y_pred = model.predict(X_test)

In [None]:
plt.scatter(y_test, y_pred)

In [None]:
sns.distplot((y_test-y_pred), bins=50) 

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

In [None]:
#Predictions and Visualization:

In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual Prices vs. Predicted Prices")
plt.show()

residuals = y_test - y_pred
plt.scatter(y_test, residuals)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel("Actual Prices")
plt.ylabel("Residuals")
plt.title("Residual Plot")
plt.show()

new_data = [[3, 2, 1500, 4000, 1, 0]]
predicted_price = model.predict(new_data)

print("Predicted Price:", predicted_price[0])