In [5]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error


# Update the file paths to your actual CSV locations
train_file = r"C:\Users\gnana\Desktop\jupyter\house_price-prediction\train.csv"
test_file = r"C:\Users\gnana\Desktop\jupyter\house_price-prediction\test.csv"

# Load training and testing data
test_df = pd.read_csv(test_file)
train_df = pd.read_csv(train_file)

# Set the target column. Adjust this if your CSV uses a different column name.
target_col = "SalePrice"  # Change to "price" if that is the correct column in your CSV

# Features for both training and testing datasets
features = ["GrLivArea", "BedroomAbvGr", "FullBath"]

# Prepare training data
X_train = train_df[features]
y_train = train_df[target_col]

# Prepare testing data
X_test = test_df[features]
if target_col in test_df.columns:
    y_test = test_df[target_col]
else:
    y_test = None

# Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Generate predictions for the training set
y_train_pred = model.predict(X_train)
print("Training Set Predictions:")
print(pd.DataFrame({'Actual': y_train, 'Predicted': y_train_pred}))

# Generate predictions for the testing set
y_test_pred = model.predict(X_test)
if y_test is not None:
    print("\nTesting Set Predictions:")
    print(pd.DataFrame({'Actual': y_test, 'Predicted': y_test_pred}))
    
    # Evaluate model performance on both training and testing data
    train_mae = mean_absolute_error(y_train, y_train_pred)
    test_mae = mean_absolute_error(y_test, y_test_pred)
    train_rmse = mean_squared_error(y_train, y_train_pred, squared=False)
    test_rmse = mean_squared_error(y_test, y_test_pred, squared=False)
    
    print("\nModel Performance:")
    print(f"Training MAE: {train_mae}")
    print(f"Testing MAE: {test_mae}")
    print(f"Training RMSE: {train_rmse}")
    print(f"Testing RMSE: {test_rmse}")
else:
    print("\nTesting Set Predictions (no target column provided in test set):")
    print(pd.DataFrame({'Predicted': y_test_pred}))
    


Training Set Predictions:
      Actual      Predicted
0     208500  211526.414424
1     181500  162218.761002
2     223500  219891.105629
3     140000  182602.158118
4     250000  237377.204677
...      ...            ...
1455  175000  204592.525661
1456  210000  251478.821102
1457  266500  253005.969824
1458  142125  140132.047179
1459  147500  131863.702253

[1460 rows x 2 columns]

Testing Set Predictions (no target column provided in test set):
          Predicted
0     120100.812977
1     139898.208279
2     202611.414586
3     199859.871426
4     192059.204300
...             ...
1454  113813.579125
1455  113813.579125
1456  100482.394785
1457  100386.048506
1458  243444.315076

[1459 rows x 1 columns]


In [6]:
pd.DataFrame({'Predicted': y_test_pred})

Unnamed: 0,Predicted
0,120100.812977
1,139898.208279
2,202611.414586
3,199859.871426
4,192059.204300
...,...
1454,113813.579125
1455,113813.579125
1456,100482.394785
1457,100386.048506
