In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

In [None]:
df = pd.read_csv('dataset.csv')

In [None]:
X = df.drop(['buy_total_price', 'carpet_area', 'property_type'], axis=1)
y1 = df['buy_total_price']
y2 = df['carpet_area']
y3 = df['property_type']


In [None]:
X_train, X_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.2, random_state=42)
X_train, X_test, y2_train, y2_test = train_test_split(X, y2, test_size=0.2, random_state=42)
X_train, X_test, y3_train, y3_test = train_test_split(X, y3, test_size=0.2, random_state=42)


In [None]:
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])



In [None]:
model1 = Pipeline(steps=[('preprocessor', preprocessor),
                         ('regressor', RandomForestRegressor())])

model2 = Pipeline(steps=[('preprocessor', preprocessor),
                         ('regressor', DecisionTreeRegressor())])

model3 = Pipeline(steps=[('preprocessor', preprocessor),
                         ('classifier', RandomForestClassifier())])


In [None]:
param_grid1 = {'regressor__n_estimators': [10, 50, 100, 200],
               'regressor__max_depth': [None, 5, 10, 15]}

param_grid2 = {'regressor__max_depth': [None, 5, 10, 15]}

param_grid3 = {'classifier__n_estimators': [10, 50, 100, 200],
               'classifier__max_depth': [None, 5, 10, 15]}


In [None]:
grid1 = GridSearchCV(model1, param_grid1, cv=5, scoring='neg_mean_squared_error')
grid1.fit(X_train, y1_train)

grid2 = GridSearchCV(model2, param_grid2, cv=5, scoring='neg_mean_squared_error')
grid2.fit(X_train, y2_train)

grid3 = GridSearchCV(model3, param_grid3, cv=5, scoring='accuracy')
grid3.fit(X_train, y3_train)


In [None]:
best_model1 = grid1.best_estimator_
best_model2 = grid2.best_estimator_
best_model3 = grid3.best_estimator_

print("Best hyperparameters for buy total price:", grid1.best_params_)
print("Best hyperparameters for carpet area:", grid2.best_params_)
print("Best hyperparameters for property type:", grid3.best_params_)


In [None]:
y1_pred = best_model1.predict(X_test)
y2_pred = best_model2.predict(X_test)
y3_pred = best_model3.predict(X_test)


In [None]:
print("Mean squared error for buy total price:", mean_squared_error(y1_test, y1_pred))
print("Mean squared error for carpet area:", mean_squared_error(y2_test, y2_pred))
print("Accuracy for property type:", accuracy_score(y3_test, y3_pred))
print("Classification report for property type:")
print(classification_report(y3_test, y3_pred))
print("Confusion matrix for property type:")
print(confusion_matrix(y3_test, y3_pred))