In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the dataset
# Assuming the dataset is named 'house_prices.csv'
file_name = 'house_prices.csv'
data = pd.read_csv(file_name)

# Display basic information about the dataset
print("Dataset Overview:")
print(data.head())
print(data.describe())

# Q1. Best regression metric for predicting house prices
print("\nQ1. Best regression metric for predicting house prices:")
print("In predicting house prices, which is a regression problem, Mean Absolute Error (MAE) is often preferred.")
print("MAE provides a clear understanding of the average prediction error in the same units as the target variable, which is helpful in practical terms.")

# Q2. Choosing between MSE and R-squared
print("\nQ2. Choosing between MSE and R-squared:")
print("If the goal is to predict the actual price of a house as accurately as possible, Mean Absolute Error (MAE) or Root Mean Squared Error (RMSE) are more appropriate than R-squared.")
print("MSE measures the average squared error, which is useful for understanding the magnitude of errors, but RMSE provides a more interpretable measure in the same units as the target.")

# Q3. Metric for datasets with outliers
print("\nQ3. Metric for datasets with significant outliers:")
print("For datasets with significant outliers, RMSE may not be the best metric as it can be heavily influenced by outliers.")
print("In such cases, MAE is often preferred because it is less sensitive to outliers compared to MSE.")

# Q4. Choosing between MSE and RMSE when values are close
print("\nQ4. Choosing between MSE and RMSE when values are close:")
print("When MSE and RMSE values are very close, RMSE is generally preferred because it is in the same units as the target variable, making interpretation easier.")

# Q5. Best metric to measure variance explained by the model
print("\nQ5. Best metric to measure how well the model explains the variance:")
print("R-squared is the most appropriate metric to measure how well the model explains the variance in the target variable.")
print("R-squared indicates the proportion of the variance in the target variable that is predictable from the features.")

# Example code to implement an SVM model and calculate metrics
# Prepare the data
X = data.drop('price', axis=1)
y = data['price']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train an SVM regression model with a linear kernel
svr = SVR(kernel='linear')
svr.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svr.predict(X_test_scaled)

# Calculate and display metrics
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = metrics.r2_score(y_test, y_pred)

print(f"\nMean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared: {r2:.2f}")

# Optional: Plot predictions vs actual values
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Actual vs Predicted Prices')
plt.show()
