In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np


In [None]:
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')

print(train_df.shape)
print(test_df.shape)
print(train_df.head())


(1460, 81)
(1459, 80)
   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \
0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   
2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   
3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   

  YrSold  SaleType  SaleCondition  SalePrice  

In [None]:
features = ['GrLivArea', 'BedroomAbvGr', 'FullBath']
target = 'SalePrice'

X = train_df[features]
y = train_df[target]
X_test_final = test_df[features].copy()


In [None]:
for col in features:
    X[col].fillna(X[col].median(), inplace=True)
    X_test_final[col].fillna(X_test_final[col].median(), inplace=True)


None
None
None


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X[col].fillna(X[col].median(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col].fillna(X[col].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) inst

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:

print("Model Coefficients:")
for feature, coef in zip(features, model.coef_):
    print(f"{feature}: {coef:.2f}")
print("Intercept:", model.intercept_)


Model Coefficients:
GrLivArea: 104.03
BedroomAbvGr: -26655.17
FullBath: 30014.32
Intercept: 52261.74862694461


In [None]:
y_pred_val = model.predict(X_val)
mae = mean_absolute_error(y_val, y_pred_val)
r2 = r2_score(y_val, y_pred_val)

print(f"Mean Absolute Error: {mae:.2f}")
print(f"R-squared: {r2:.4f}")


Mean Absolute Error: 35788.06
R-squared: 0.6341


In [None]:
test_predictions = model.predict(X_test_final)
submission_df = pd.DataFrame({'Id': test_df['Id'], 'SalePrice': test_predictions})
submission_df['SalePrice'] = submission_df['SalePrice'].apply(lambda x: max(0, x))
print("\n predicted house prices for test.csv:")
print(submission_df)
submission_df.to_csv('submission.csv', index=False)
print("\nPredictions saved to 'submission.csv'")


 predicted house prices for test.csv:
        Id      SalePrice
0     1461  122173.313104
1     1462  140561.538683
2     1463  201783.754896
3     1464  199183.097221
4     1465  192133.739106
...    ...            ...
1454  2915  115907.303921
1455  2916  115907.303921
1456  2917  102983.611090
1457  2918  103216.094466
1458  2919  240377.514797

[1459 rows x 2 columns]

Predictions saved to 'submission.csv'
