In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, OrdinalEncoder
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import pickle

In [None]:
df=pd.read_csv("..Datasets/cleaned_dataset.csv")

In [None]:
y=df['Amount']
X=df.drop(['Amount','Description','Society'], axis=1)

In [None]:
num_features=X.select_dtypes(include=['int64', 'float64']).columns
cat_features=X.select_dtypes(include=['object']).columns

In [None]:
num_pipeline= Pipeline(steps=[('scaler', RobustScaler())])
cat_pipeline= Pipeline(steps=[('encoder', OrdinalEncoder())])

In [None]:
transformer= ColumnTransformer(transformers=[('Numerical Transformer', num_pipeline, num_features),
                                             ('Categorical Transformer', cat_pipeline, cat_features)])

In [None]:
final_pipeline = Pipeline(steps=[('Transformer', transformer),
 ('Model',GradientBoostingRegressor(max_depth=5, loss="huber", learning_rate=0.2, alpha=0.75, min_samples_split=6, min_samples_leaf=5))])

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=0.85,random_state=1)

In [None]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((50857, 14), (8975, 14), (50857,), (8975,))

In [None]:
final_pipeline.fit(X_train,y_train)

In [None]:
y_pred=final_pipeline.predict(X_test)
metrics.r2_score(y_test,y_pred)

0.67895164986316

In [None]:
final_pipeline[0].get_feature_names_out()

array(['Numerical Transformer__Bathroom',
       'Numerical Transformer__Balcony', 'Numerical Transformer__BHK',
       'Numerical Transformer__Area',
       'Numerical Transformer__No of Car Parking',
       'Numerical Transformer__Sale Floor',
       'Numerical Transformer__Total Floors',
       'Categorical Transformer__Location',
       'Categorical Transformer__Transaction',
       'Categorical Transformer__Furnishing',
       'Categorical Transformer__Facing',
       'Categorical Transformer__Overlooking',
       'Categorical Transformer__Ownership',
       'Categorical Transformer__Type of Car Parking'], dtype=object)

In [None]:
pickle.dump(final_pipeline,open('/content/drive/MyDrive/Final_ML_Project/apartment_price_predictor.pkl','wb'))

In [None]:
prediction=pickle.load(open('/content/drive/MyDrive/Final_ML_Project/apartment_price_predictor.pkl','rb'))

In [None]:
data=['thane','Resale','Furnished','Not Mentioned','Not Available',1,0,'Co-operative Society',1,600.0,'Not_Available',0,0,2]

In [None]:
prediction.predict(pd.DataFrame([data],columns=X.columns))

array([51.75489335])