In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

# Load CSV files
train_df = pd.read_csv("/content/train.csv")
test_df = pd.read_csv("/content/test.csv")

# Step 1: Replace text NaN values with np.nan
nan_values = ['NaN', 'nan', 'NULL', 'null', 'None', '?', '']
train_df.replace(nan_values, np.nan, inplace=True)
test_df.replace(nan_values, np.nan, inplace=True)

# Step 2: Encode categorical (string) columns
label_encoders = {}

for col in train_df.columns:
    if train_df[col].dtype == 'object':
        le = LabelEncoder()
        combined = pd.concat([train_df[col], test_df[col]], axis=0).astype(str)
        le.fit(combined)
        train_df[col] = le.transform(train_df[col].astype(str))
        test_df[col] = le.transform(test_df[col].astype(str))
        label_encoders[col] = le

# Step 3: Drop SalePrice from test if present
if 'SalePrice' in test_df.columns:
    test_df.drop('SalePrice', axis=1, inplace=True)

# Step 4: Impute missing values (mean for numeric, mode for categorical)
for col in train_df.columns:
    if train_df[col].isnull().sum() > 0:
        if np.issubdtype(train_df[col].dtype, np.number):
            mean_val = train_df[col].mean()
            train_df[col].fillna(mean_val, inplace=True)
            if col in test_df.columns:
                test_df[col].fillna(mean_val, inplace=True)
        else:
            mode_val = train_df[col].mode()[0]
            train_df[col].fillna(mode_val, inplace=True)
            if col in test_df.columns:
                test_df[col].fillna(mode_val, inplace=True)

# Step 5: Prepare training data
X = train_df.drop('SalePrice', axis=1)
y = train_df['SalePrice']

# Step 6: Ensure column consistency between train and test
for col in X.columns:
    if col not in test_df.columns:
        test_df[col] = 0

extra_cols = set(test_df.columns) - set(X.columns)
test_df.drop(columns=extra_cols, inplace=True)

test_df = test_df[X.columns]

# Convert to numeric and final imputation
X = X.apply(pd.to_numeric, errors='coerce')
test_df = test_df.apply(pd.to_numeric, errors='coerce')

X.fillna(X.mean(), inplace=True)
test_df.fillna(X.mean(), inplace=True)

# Step 7: Train model
model = LinearRegression()
model.fit(X, y)

# Step 8: Predict
predictions = model.predict(test_df)

# Step 9: Print the predicted prices
print("\n🏠 Predicted House Prices:\n")
for i, price in enumerate(predictions, start=1):
    print(f"House {i}: ${price:,.2f}")

print("\n✅ Prediction complete! Total houses predicted:", len(predictions))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df[col].fillna(mean_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df[col].fillna(mean_val, inplace=True)



🏠 Predicted House Prices:

House 1: $105,939.70
House 2: $153,986.33
House 3: $166,270.42
House 4: $179,821.72
House 5: $190,122.92
House 6: $171,983.86
House 7: $184,907.98
House 8: $162,901.18
House 9: $199,835.29
House 10: $109,199.26
House 11: $201,327.40
House 12: $109,076.35
House 13: $91,156.55
House 14: $146,811.78
House 15: $107,448.74
House 16: $344,445.92
House 17: $236,513.03
House 18: $306,837.43
House 19: $297,860.13
House 20: $432,299.84
House 21: $278,113.93
House 22: $208,971.18
House 23: $178,292.00
House 24: $165,115.11
House 25: $203,507.98
House 26: $211,438.46
House 27: $290,984.89
House 28: $234,556.88
House 29: $188,549.13
House 30: $243,929.95
House 31: $212,459.83
House 32: $95,740.68
House 33: $195,142.43
House 34: $290,050.37
House 35: $264,481.61
House 36: $250,982.77
House 37: $165,528.61
House 38: $168,330.04
House 39: $164,092.08
House 40: $157,232.91
House 41: $210,006.82
House 42: $147,179.85
House 43: $281,273.94
House 44: $243,549.92
House 45: $226,