In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [None]:
df1= pd.read_csv('File1.csv')
df2=pd.read_csv('File2.csv')
df3= pd.read_csve('File3.csv')
df= pd.content([df1, df2, df3], ignore_index=True)

In [None]:
print(df.head())
print(df.info())
print(df.describe())
print(df.isnull().sum())

In [None]:
plt.hist(df['numeric_column'], bins=20)
plt.xlabel('Numeric Column')
plt.ylabel('Frequency')
plt.title('Histogram of Numeric Column')
plt.show()

In [None]:
sns.boxplot(x='categorical_column', y='numeric_column', data=df)
plt.xlabel('Categorical Column')
plt.ylabel('Numeric Column')
plt.title('Box plot of Numeric Column by Categorical Column')
plt.show()

In [None]:
df['year'] = pd.to_datetime(df['date_column']).dt.year

In [None]:
df = pd.read_csv("combined_data.csv")

In [None]:
X_rental = df.drop(columns=['rental_price'])
y_rental = df['rental_price']

In [None]:
X_area = df.drop(columns=['area'])
y_area = df['area']

In [None]:
X_rental_train, X_rental_test, y_rental_train, y_rental_test = train_test_split(X_rental, y_rental, test_size=0.2, random_state=42)
X_area_train, X_area_test, y_area_train, y_area_test = train_test_split(X_area, y_area, test_size=0.2, random_state=42)

In [None]:
rental_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor())])

In [None]:
rental_param_grid = {
    'model__n_estimators': [100, 200, 300],
    'model__max_depth': [None, 5, 10, 15],
    'model__min_samples_split': [2, 5, 10],
    'model__min_samples_leaf': [1, 2, 4]}

In [None]:
rental_grid_search = GridSearchCV(rental_pipeline, rental_param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
rental_grid_search.fit(X_rental_train, y_rental_train)

In [None]:
best_rental_model = rental_grid_search.best_estimator_

In [None]:
rental_predictions = best_rental_model.predict(X_rental_test)
rental_mse = mean_squared_error(y_rental_test, rental_predictions)
print("Rental Price Prediction MSE:", rental_mse)

In [None]:
area_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', Ridge())])

In [None]:
area_param_grid = {
    'model__alpha': [0.1, 1.0, 10.0]}

In [None]:
area_grid_search = GridSearchCV(area_pipeline, area_param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
area_grid_search.fit(X_area_train, y_area_train)

In [None]:
best_area_model = area_grid_search.best_estimator_

In [None]:
area_predictions = best_area_model.predict(X_area_test)
area_mse = mean_squared_error(y_area_test, area_predictions)
print("Area Prediction MSE:", area_mse)