### Prodigy ML Task 1 ###
Implement a linear regression model to predict the prices of houses based on their square 
footage and the number of bedrooms and bathrooms.

 A lower Mean Squared Error (MSE) and a higher R² score indicate a better model fit.

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# File paths
train_path = r'C:\Study Material\Iternship\Prodigy\Task 1\house-prices-advanced-regression-techniques\train.csv'
test_path = r'C:\Study Material\Iternship\Prodigy\Task 1\house-prices-advanced-regression-techniques\test.csv'

# Load the datasets
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

# Display the first few rows of the dataframes
print("Training Data:")
print(train_df.head())
print("\nTesting Data:")
print(test_df.head())

# Select relevant features and the target variable from the training set
features_train = train_df[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'HalfBath']]
target_train = train_df['SalePrice']

# Select relevant features from the testing set (target not available in test set)
features_test = test_df[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'HalfBath']]

# Check for missing values and handle them (if any)
print(features_train.isnull().sum())
features_train = features_train.fillna(features_train.mean())
features_test = features_test.fillna(features_test.mean())

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(features_train, target_train)

# Predict the target variable for the training set and testing set
train_predictions = model.predict(features_train)
test_predictions = model.predict(features_test)

# Calculate and display Mean Squared Error (MSE) and R² score for the training set
mse_train = mean_squared_error(target_train, train_predictions)
r2_train = r2_score(target_train, train_predictions)
print(f'Mean Squared Error (Training Set): {mse_train:.2f}')
print(f'R² Score (Training Set): {r2_train:.2f}')

# Example of saving the predictions (optional)
output = pd.DataFrame({'Id': test_df['Id'], 'SalePrice': test_predictions})
output.to_csv('house_price_predictions.csv', index=False)
print("Predictions saved to 'house_price_predictions.csv'")

# Example prediction for a new house
new_house = pd.DataFrame([[2500, 4, 3, 2]], columns=['GrLivArea', 'BedroomAbvGr', 'FullBath', 'HalfBath'])  # Example: 2500 sqft, 4 bedrooms, 3 bathrooms
predicted_price = model.predict(new_house)
print(f'Predicted Price for the new house: ${predicted_price[0]:,.2f}')


Training Data:
   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \
0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   
2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   
3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   

  YrSold  SaleType  SaleCondition  SalePrice  
0   20