{% block title %}
{% endblock %}

## Importing the libraries

In [None]:
{% block imports %}
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
{% endblock %}

## Start stopwatch

In [None]:
{% block stopwatch %}
from time import process_time
time_start = process_time() 
{% endblock %}

## Import the dataset

In [None]:
{% block dataset %}
X = pd.read_csv('{{ features_file_path }}')
y = pd.read_csv('{{ labels_file_path }}')
{% endblock %}

### Categorize dataset

In [None]:
def categorize_column(dataframe, category_threshold, column):
    
    uniq = dataframe[column].unique()
    if len(uniq) <= category_threshold:
        dataframe[column] = dataframe[column].astype('category').cat.codes
        

def categorize_dataframe(dataframe, category_threshold):
    for c in dataframe.columns:
        categorize_column(dataframe, category_threshold, c)
        
categorize_dataframe(X, {{ category_threshold }})
categorize_dataframe(y, {{ category_threshold }})
X = X.values
y = y.values


## Splitting the dataset into the Training set and Test set

In [None]:
{% block split %}
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size={{train_split.test_size}},
                        train_size={{train_split.train_size}},
                        random_state={{train_split.random_state}},
                        shuffle={{train_split.random_state}},
                        stratify=X if "{{train_split.stratify}}" == "features" else y if "{{train_split.stratify}}" == "labels" else None)
{% endblock %}

## Training the Simple Decision Tree model on the Training set

In [None]:
{% block train %}
{% endblock %}

## Predicting the Test set results

In [None]:
{% block predict %}
y_pred = regressor.predict(X_test)
{% endblock %}

## Evaluating the Model Performance

In [None]:
{% block evaluate %}
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
print(f'R2 Score: {r2_score(y_test, y_pred)}')
print(f'RMSE: {mean_squared_error(y_test, y_pred)**0.5}')
print(f'MAE: {mean_absolute_error(y_test, y_pred)}')

x_ax = range(len(y_test))
plt.plot(x_ax, y_test, linewidth=1, label="original")
plt.plot(x_ax, y_pred, linewidth=1.1, label="predicted")
plt.title("y-test and y-predicted data")
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.legend(loc='best',fancybox=True, shadow=True)
plt.grid(True)
plt.show()

time_stop = process_time()
print(f'Elapsed CPU Time: {round(time_stop - time_start, 2)} seconds')
{% endblock %}