In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error

In [None]:
df = pd.read_csv(r'C:\Users\Hp\Downloads\ML_Data-20240722T104249Z-001 (1)\ML_Data\2. house_price_prediction\House.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df['City'] = df['Address'].str.extract(r'([A-Za-z]+)')

In [None]:
df.head()

In [None]:
df = df.drop('Address',axis=1)
df.head()

In [None]:
df.isnull().sum().sum()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
num_cols = df.select_dtypes(include=['int64','float64']).columns

In [None]:
# 1️⃣ Histogram
df[num_cols].hist(bins=20, figsize=(12,8),color='red',alpha=0.8)
plt.suptitle("Histograms of Numeric Columns", fontsize=16)
plt.show()

In [None]:
# 2️⃣ Boxplots
plt.figure(figsize=(12,3))
sns.boxplot(data=df[num_cols],color='red')
plt.title("Boxplots of Numeric Columns (Outliers)")
plt.tight_layout()
plt.show()

In [None]:
sns.set(style="ticks")  # optional styling
pair_plot = sns.pairplot(df[num_cols])
pair_plot.fig.suptitle("Pairplot of Numeric Columns", y=1.02, fontsize=16)
pair_plot.fig.set_size_inches(10,10)  # figure size adjust
plt.show()

In [None]:
target = 'Price'
features = [col for col in num_cols if col != target]

plt.figure(figsize=(15,12))
for i, col in enumerate(features):
    plt.subplot(3,2,i+1)
    sns.scatterplot(x=col, y=target, data=df,color='red',alpha=0.8)
    plt.title(f'{col} vs {target}')
plt.tight_layout()
plt.show()


In [None]:
num_cols = df.select_dtypes(include=['int64', 'float64']).columns

# Loop through each numeric column
for col in num_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1

    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR

    # Filter out outliers
    df = df[(df[col] >= lower) & (df[col] <= upper)]

# Check first 5 rows
df.head()


In [None]:
df.shape

In [None]:
df = pd.get_dummies(df,columns=['City'],drop_first=True)

In [None]:
df.head()

In [None]:
sns.heatmap(df.corr())

In [None]:
x = df.drop('Price',axis=1)
y = df['Price']

In [None]:
x

In [None]:
y

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=12)

In [None]:
x_train.shape,x_test.shape

In [None]:
y_train.shape,y_test.shape

In [None]:
sc = StandardScaler()

In [None]:
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
# External libraries
from xgboost import XGBRegressor

In [None]:
model_list = [
    ("Linear Regression", LinearRegression()),
    ("Ridge", Ridge()),
    ("Lasso", Lasso()),
    ("ElasticNet", ElasticNet()),
    ("Decision Tree", DecisionTreeRegressor()),
    ("Random Forest", RandomForestRegressor()),
    ("Gradient Boosting", GradientBoostingRegressor()),
    ("XGBoost", XGBRegressor(verbosity=0)),
    ("SVR", SVR()),
    ("KNN", KNeighborsRegressor())
]

In [None]:
# Simple loop
for i in range(len(model_list)):
    name, model = model_list[i]
    
    model.fit(x_train, y_train)      # Train
    y_pred = model.predict(x_test)   # Predict

    r2 = r2_score(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)

    print(f"{i+1}. {name}: R2 = {r2:.4f}, RMSE = {rmse:.2f}, MAE = {mae:.2f}")


In [None]:
gbr = GradientBoostingRegressor()

In [None]:
gbr.fit(x_train,y_train)

In [None]:
gbr.score(x_train,y_train)*100

In [None]:
gbr.score(x_test,y_test)*100

In [None]:
y_pred = gbr.predict(x_test)

In [None]:
y_pred

In [None]:
y_test

In [None]:
r2_score(y_test,y_pred)

In [None]:
mean_squared_error(y_test,y_pred)

In [None]:
from joblib import dump, load

# Save
dump(gbr, 'gbr.joblib')
print("GradientBoostingRegressor saved successfully!")

# Load
loaded_model = load('gbr.joblib')
y_pred = loaded_model.predict(x_test)


In [None]:
y_pred

In [None]:
y_test