In [None]:
# Task 3: Predict Housing Prices

# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
# Replace 'housing_data.csv' with the path to your dataset
data = pd.read_csv('housing_data.csv')

# Display the first few rows of the dataset
print(data.head())

# Data Preprocessing
# Handling missing values
data = data.dropna()  # Dropping rows with missing values

# Encoding categorical features (e.g., location)
data = pd.get_dummies(data, drop_first=True)

# Splitting the data into features (X) and target (y)
X = data.drop('Price', axis=1)  # Replace 'Price' with the name of your target column
y = data['Price']

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Training
# Using Random Forest Regressor
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

# Feature Importance
feature_importances = pd.DataFrame({'Feature': X.columns, 'Importance': model.feature_importances_})
feature_importances = feature_importances.sort_values(by='Importance', ascending=False)
print(feature_importances)

# Save the trained model
import joblib
joblib.dump(model, 'housing_price_model.pkl')

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Loading and predicting with the model
# loaded_model = joblib.load('housing_price_model.pkl')
# loaded_scaler = joblib.load('scaler.pkl')
# X_new = loaded_scaler.transform(new_data)  # Replace 'new_data' with your new input
# predictions = loaded_model.predict(X_new)