In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from datetime import datetime

In [6]:
dataset = pd.read_csv('linear_interpolation_comp.csv')

dataset['snapped_at'] = pd.to_datetime(dataset['snapped_at'])

dataset['day_of_week'] = dataset['snapped_at'].dt.dayofweek
dataset['month'] = dataset['snapped_at'].dt.month
dataset['year'] = dataset['snapped_at'].dt.year

dataset.drop(columns=['snapped_at'], inplace=True)


X = dataset.drop(columns=['price'])
y = dataset['price']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeRegressor()

param_grid = {
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Use the best model to make predictions
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Test Mean Squared Error:", mse)

Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 5}
Best Score: 0.9996535675774908
Test Mean Squared Error: 109491.76574236534
