In [17]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [24]:
df = pd.read_csv("minihomeprices.csv")
df['bedrooms'] = df['bedrooms'].fillna(df['bedrooms'].median())
df = df.drop_duplicates().reset_index(drop=True)
df_cleaned = df.astype({'bedrooms': 'int'})

In [30]:
print("Shape:", df_cleaned.shape)
print("\nMissing values:\n", df_cleaned.isnull().sum())
print("\nStatistical summary:\n", df_cleaned.describe())

Shape: (6, 4)

Missing values:
 area        0
bedrooms    0
age         0
price       0
dtype: int64

Statistical summary:
               area  bedrooms        age          price
count     6.000000  6.000000   6.000000       6.000000
mean   3416.666667  4.166667  16.500000  648333.333333
std     587.934237  1.169045   8.288546  109117.673484
min    2600.000000  3.000000   8.000000  550000.000000
25%    3050.000000  3.250000   9.750000  572500.000000
50%    3400.000000  4.000000  16.500000  602500.000000
75%    3900.000000  4.750000  19.500000  722500.000000
max    4100.000000  6.000000  30.000000  810000.000000


In [33]:
X = df_cleaned[['area', 'bedrooms', 'age']]
y = df_cleaned['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [34]:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("\nModel Evaluation Metrics:")
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("R² Score:", r2)


Model Evaluation Metrics:
Mean Squared Error (MSE): 1713617314.5467486
Mean Absolute Error (MAE): 39608.20895522428
R² Score: -29.46430781416442


In [35]:
print("Model Evaluation Metrics:")
print("MSE:", mean_squared_error(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))
print("\nModel Parameters:")
print("Intercept:", model.intercept_)
print("Coefficients:", dict(zip(X.columns, model.coef_)))

Model Evaluation Metrics:
MSE: 1713617314.5467486
MAE: 39608.20895522428
R² Score: -29.46430781416442

Model Parameters:
Intercept: 120373.13432835019
Coefficients: {'area': 115.6716417910469, 'bedrooms': 38432.83582089555, 'age': -1902.9850746268623}


In [36]:
df.head(10)

Unnamed: 0,area,bedrooms,age,price
0,2600,3.0,20,550000
1,3000,4.0,15,565000
2,3200,4.0,18,610000
3,3600,3.0,30,595000
4,4000,5.0,8,760000
5,4100,6.0,8,810000
