#### 1. Import Libraries

In [1]:
#import the neccessary libraries

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

#### 2. Load and prepare data

In [None]:
df = pd.read_csv('sample_data.csv')


# Split data into features and target variable
X = df.drop('target_variable', axis=1)  # Drop target variable and remain with predictor variables
y = df['target_variable']               # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### 3. Call and Train the Model

In [None]:
# Create a decision tree regressor object
dtr_model = DecisionTreeRegressor(random_state=42)

# Train the model on the training data
dtr_model.fit(X_train, y_train)


#### 4. Make Predictions

In [None]:
# Make predictions

y_pred = dtr_model.predict(X_test)


#### 5. Model Evaluation

In [None]:
# Evaluate Mean Squared Error

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

#### 6.  Model Tuning

In [None]:
# Example of hyperparameter tuning for a decision tree regressor uing GridSearchCV

#import GridSearchCV
from sklearn.model_selection import GridSearchCV


#create dictionary with the key as hyperparameters and value as list of hyperparameter values to be used
param_grid = {
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [2, 3, 4]
}


#create a grid_search object hat takes in the model, param_grid and the cross validation setting
grid_search = GridSearchCV(estimator=dtr_model, param_grid=param_grid, cv=5)


#fit the grid_search object with the training data
grid_search.fit(X_train, y_train)


#retrieve the best combination of hyperparameters found and best estimator (model)
best_params = grid_search.best_params_
best_reg = grid_search.best_estimator_

# Use the best model for prediction
y_pred = best_reg.predict(X_test)


#### 7. Interpret the Model

In [None]:
#Visualise the tree to understand the decision making process

from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
plot_tree(dtr_model, filled=True, feature_names=X.columns)
plt.show()