In [None]:
data1 = pd.read_csv('Data1.csv')
data2 = pd.read_csv('Data2.csv')
data3 = pd.read_csv('Data3.csv')


In [None]:
merged_data = pd.merge(data1, data2, on='HotelName', how='inner')
merged_data = pd.merge(merged_data, data3, on='HotelName', how='inner')

In [None]:
merged_data.dropna(inplace=True)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.pairplot(merged_data)
plt.show()

In [None]:
correlation = merged_data.corr()
sns.heatmap(correlation, annot=True)
plt.show()

In [None]:
X = merged_data.drop(['Price', 'ReviewScore'], axis=1)
y_price = merged_data['Price']
y_review_score = merged_data['ReviewScore']

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_price_train, y_price_test, y_review_train, y_review_test = train_test_split(X_scaled, y_price, y_review_score, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.model_selection import GridSearchCV

price_model = RandomForestRegressor()
price_model.fit(X_train, y_price_train)

review_model = RandomForestClassifier()
review_model.fit(X_train, y_review_train)

In [None]:
param_grid = {'n_estimators': [100, 200, 300], 'max_depth': [None, 10, 20]}

grid_price = GridSearchCV(price_model, param_grid, cv=5)
grid_review = GridSearchCV(review_model, param_grid cv=5)

grid_price.fit(X_train, y_price_train)
grid_review.fit(X_train, y_review_train)


In [None]:
price_pred = grid_price.predict(X_test)
price_rmse = mean_squared_error(y_price_test, price_pred, squared=False)

review_pred = grid_review.predict(X_test)
review_accuracy = accuracy_score(y_review_test, review_pred)

In [None]:
import joblib

joblib.dump(grid_price.best_estimator_, 'best_price_model.pkl')
joblib.dump(grid_review.best_estimator_, 'best_review_model.pkl')

print(f'Price Prediction RMSE: {price_rmse}')
print(f'Review Score Prediction Accuracy: {review_accuracy}')