In [7]:
#Implement linear regression to predict house prices based on square footage, number of bathrooms, and number of bedrooms:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load your training and testing datasets
train_df = pd.read_csv('/content/train.csv')
test_df = pd.read_csv('/content/test.csv')

# Define the features and target
features = ['GrLivArea', 'FullBath', 'BedroomAbvGr']
target = 'SalePrice'

 # Separate features and target variable in training dataset
X_train = train_df[features]
y_train = train_df[target]

# Use only features in the test dataset
X_test = test_df[features]

# Preprocessing pipeline
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, features)
    ])

# Create a pipeline that preprocesses the data and applies linear regression
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', LinearRegression())
])

# Fit the model on the training data
model_pipeline.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model_pipeline.predict(X_test)

# Save the predictions to an Excel file
output_df = pd.DataFrame({
    'Predicted Price': y_pred
})

# Include the ID in the output dataframe
output_df = pd.DataFrame({
    'ID': test_df['Id'],
    'Predicted Price': y_pred
})


output_df.to_excel('/content/predictions.xlsx', index=False)
print("Predictions saved to Excel file successfully.")


Predictions saved to Excel file successfully.
