In [None]:
# prompt: forecasting  the house prices accurately using smart regression technique in data science

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset (replace 'house_prices.csv' with your actual file)
try:
  df = pd.read_csv('house_prices.csv')
except FileNotFoundError:
  print("Error: 'house_prices.csv' not found. Please upload your dataset.")
  df = None

if df is not None:
  # Data Preprocessing (example, adjust based on your data)

  # 1. Handle missing values (replace with mean, median, or more sophisticated methods)
  for col in df.columns:
      if df[col].isnull().any():
          if pd.api.types.is_numeric_dtype(df[col]):
              df[col].fillna(df[col].mean(), inplace=True)
          else:
              df[col].fillna(df[col].mode()[0], inplace=True)


  # 2. Feature Encoding (for categorical variables) - One-hot encoding example
  categorical_cols = df.select_dtypes(include=['object']).columns
  df = pd.get_dummies(df, columns=categorical_cols, drop_first=True) # Drop first to avoid multicollinearity


  # 3. Feature Scaling (important for gradient boosting)
  scaler = StandardScaler()
  numerical_cols = df.select_dtypes(include=['number']).columns
  df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

  # Define features (X) and target variable (y)
  X = df.drop('SalePrice', axis=1)  # Replace 'SalePrice' with your target column name
  y = df['SalePrice']


  # Split data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Adjust test_size as needed


  # Model Training (Gradient Boosting Regressor)
  model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42) # Tune hyperparameters

  model.fit(X_train, y_train)

  # Model Evaluation
  y_pred = model.predict(X_test)
  mse = mean_squared_error(y_test, y_pred)
  rmse = np.sqrt(mse)
  r2 = r2_score(y_test, y_pred)

  print(f"Mean Squared Error: {mse}")
  print(f"Root Mean Squared Error: {rmse}")
  print(f"R-squared: {r2}")


  # Example prediction (replace with your new data)
  # new_data = pd.DataFrame(...)  # Create a DataFrame with the same features as your training data
  # new_prediction = model.predict(new_data)
  # print(f"Predicted House Price: {new_prediction}")
