In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Set pandas to display more columns for better inspection
pd.set_option('display.max_columns', None)

print("Libraries imported successfully.")

Libraries imported successfully.


In [None]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

print("Original Training Data (first 5 rows):")
print(train_df.head())

Original Training Data (first 5 rows):
   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities LotConfig LandSlope Neighborhood Condition1  \
0         Lvl    AllPub    Inside       Gtl      CollgCr       Norm   
1         Lvl    AllPub       FR2       Gtl      Veenker      Feedr   
2         Lvl    AllPub    Inside       Gtl      CollgCr       Norm   
3         Lvl    AllPub    Corner       Gtl      Crawfor       Norm   
4         Lvl    AllPub       FR2       Gtl      NoRidge       Norm   

  Condition2 BldgType HouseStyle  OverallQual  OverallCond  YearBuilt  \
0       Norm    

In [None]:
def preprocess_data(df):
    processed_df = df.copy()
    processed_df['TotalBath'] = processed_df['FullBath'] + 0.5 * processed_df['HalfBath']
    final_features = ['GrLivArea', 'BedroomAbvGr', 'TotalBath']
    return processed_df[final_features]

X_train = preprocess_data(train_df)
y_train = train_df['SalePrice']
X_test = preprocess_data(test_df)

print("Processed Training Features (X_train, first 5 rows):")
print(X_train.head())
print("\nProcessed Test Features (X_test, first 5 rows):")
print(X_test.head())

Processed Training Features (X_train, first 5 rows):
   GrLivArea  BedroomAbvGr  TotalBath
0       1710             3        2.5
1       1262             3        2.0
2       1786             3        2.5
3       1717             3        1.0
4       2198             4        2.5

Processed Test Features (X_test, first 5 rows):
   GrLivArea  BedroomAbvGr  TotalBath
0        896             2        1.0
1       1329             3        1.5
2       1629             3        2.5
3       1604             3        2.5
4       1280             2        2.0


In [None]:
print("Missing values in X_test BEFORE handling:")
print(X_test.isnull().sum())
print("-" * 20)

for col in X_test.columns:
    if X_test[col].isnull().any():
        mean_value = X_train[col].mean()
        X_test[col].fillna(mean_value, inplace=True)

print("\nMissing values in X_test AFTER handling:")
print(X_test.isnull().sum())

Missing values in X_test BEFORE handling:
GrLivArea       0
BedroomAbvGr    0
TotalBath       0
dtype: int64
--------------------

Missing values in X_test AFTER handling:
GrLivArea       0
BedroomAbvGr    0
TotalBath       0
dtype: int64


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
print("Model training is complete.")
print(f"Model Coefficients: {model.coef_}")
print(f"Model Intercept: {model.intercept_}")

Model training is complete.
Model Coefficients: [   107.04 -27861.46  26337.6 ]
Model Intercept: 52305.361941846495


In [None]:
predictions = model.predict(X_test)

np.set_printoptions(precision=2)

print("\nFirst 5 Predicted Prices:")
print(predictions[:5])


First 5 Predicted Prices:
[118828.82 150484.93 208934.84 206258.82 186270.18]


In [None]:
submission = pd.DataFrame({
    "Id": test_df["Id"],
    "SalePrice": predictions
})
submission.to_csv('house_price_predictions.csv', index=False)

print("\nFinal Submission DataFrame (first 5 rows):")
print(submission.head())
print("\nSUCCESS: Results saved to 'house_price_predictions.csv'")


Final Submission DataFrame (first 5 rows):
     Id      SalePrice
0  1461  118828.817948
1  1462  150484.930371
2  1463  208934.844141
3  1464  206258.818173
4  1465  186270.178972

SUCCESS: Results saved to 'house_price_predictions.csv'
