In [1]:
from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from models.decision_tree import DecisionTreeClassifier, DecisionTreeRegressor
from models.random_forest import RandomForestClassifier, RandomForestRegressor

In [2]:
ds = load_breast_cancer()
X = ds.data
Y = ds.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

dt_classifier = DecisionTreeClassifier(max_depth=5)

# Fit the model
dt_classifier.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = dt_classifier.predict(X_test)

from sklearn.metrics import accuracy_score, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(Y_test, Y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display confusion matrix
conf_matrix = confusion_matrix(Y_test, Y_pred)
print('Confusion Matrix:')
print(conf_matrix)

Accuracy: 0.94
Confusion Matrix:
[[38  5]
 [ 2 69]]


In [3]:
ds = fetch_california_housing()
X = ds.data
Y = ds.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

dt_regressor = DecisionTreeRegressor(max_depth=15)

# Fit the model
dt_regressor.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = dt_regressor.predict(X_test)

from sklearn.metrics import  mean_absolute_error, r2_score

# Calculate mean absolute error
mae = mean_absolute_error(Y_test, Y_pred)
print(f'Mean Absolute Error: {mae:.2f}')

# Calculate R^2 score
r2 = r2_score(Y_test, Y_pred)
print(f'R^2 Score: {r2:.2f}')

Mean Absolute Error: 0.39
R^2 Score: 0.73


In [1]:
from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from models.decision_tree import DecisionTreeClassifier, DecisionTreeRegressor
from models.random_forest import RandomForestClassifier, RandomForestRegressor

ds = load_breast_cancer()
X = ds.data
Y = ds.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

rf_classifier = RandomForestClassifier(n_jobs=-1, n_estimators=1)

# Fit the model
rf_classifier.fit(X_train, Y_train)

# Make predictions on the test set
print(X_test.shape)
Y_pred = rf_classifier.predict(X_test)


from sklearn.metrics import accuracy_score, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(Y_test, Y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display confusion matrix
conf_matrix = confusion_matrix(Y_test, Y_pred)
print(conf_matrix)

(114, 30)
Accuracy: 0.96
[[40  3]
 [ 2 69]]


In [None]:
from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from models.decision_tree import DecisionTreeClassifier, DecisionTreeRegressor
from models.random_forest import RandomForestClassifier, RandomForestRegressor

ds = fetch_california_housing()
X = ds.data
Y = ds.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

dt_regressor = RandomForestRegressor(n_jobs=-1, n_estimators=10)

# Fit the model
dt_regressor.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = dt_regressor.predict(X_test)

from sklearn.metrics import  mean_absolute_error, r2_score

# Calculate mean absolute error
mae = mean_absolute_error(Y_test, Y_pred)
print(f'Mean Absolute Error: {mae:.2f}')

# Calculate R^2 score
r2 = r2_score(Y_test, Y_pred)
print(f'R^2 Score: {r2:.2f}')

In [3]:
from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from models.gradient_boosting_tree import GradientBoostingClassifier, GradientBoostingRegressor

ds = load_breast_cancer()
X = ds.data
Y = ds.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

dt_classifier = GradientBoostingClassifier(max_depth=10, n_estimators=100, learning_rate=0.1, tolerance=0.05, patience=5)

# Fit the model
dt_classifier.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = dt_classifier.predict(X_test)


from sklearn.metrics import accuracy_score, confusion_matrix

# Calculate accuracy
accuracy = accuracy_score(Y_test, Y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Display confusion matrix
conf_matrix = confusion_matrix(Y_test, Y_pred)
print(conf_matrix)

Accuracy: 0.94
[[39  4]
 [ 3 68]]


In [None]:
from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from models.gradient_boosting_tree import GradientBoostingClassifier, GradientBoostingRegressor

ds = fetch_california_housing()
X = ds.data
Y = ds.target
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

dt_regressor = GradientBoostingRegressor(max_depth=10, n_estimators=100, learning_rate=0.1, tolerance=0.05, patience=5)

# Fit the model
dt_regressor.fit(X_train, Y_train)

# Make predictions on the test set
Y_pred = dt_regressor.predict(X_test)

from sklearn.metrics import  mean_absolute_error, r2_score

# Calculate mean absolute error
mae = mean_absolute_error(Y_test, Y_pred)
print(f'Mean Absolute Error: {mae:.2f}')

# Calculate R^2 score
r2 = r2_score(Y_test, Y_pred)
print(f'R^2 Score: {r2:.2f}')

In [None]:
# Load modules
from models.decision_tree import DecisionTreeClassifier as OwnDecisionTreeClassifier, DecisionTreeRegressor as OwnDecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier as SklearnDecisionTreeClassifier, DecisionTreeRegressor as SklearnDecisionTreeRegressor

from sklearn.model_selection import train_test_split

from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.metrics import accuracy_score, precision_score, recall_score

# diamonds dataset is a very large regression dataset which will test the efficiency of the algorithms
from datasets.diamonds import load_diamonds

ds_r_hard = load_diamonds()
X, Y = ds_r_hard.data, ds_r_hard.target
X_r_hard_train, X_r_hard_test, Y_r_hard_train, Y_r_hard_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
dt_regressor = OwnDecisionTreeRegressor()

dt_regressor.fit(X_r_hard_train, Y_r_hard_train)
Y_r_hard_pred = dt_regressor.predict(X_r_hard_test)

print(r2_score(Y_r_hard_test, Y_r_hard_pred))

In [None]:
dt_regressor = SklearnDecisionTreeRegressor()

dt_regressor.fit(X_r_hard_train, Y_r_hard_train)
Y_r_hard_pred = dt_regressor.predict(X_r_hard_test)

print(r2_score(Y_r_hard_test, Y_r_hard_pred))

In [2]:
from models.decision_tree import DecisionTreeClassifier as OwnDecisionTreeClassifier
from sklearn.model_selection import ParameterGrid, train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import load_digits
from utils.grid_search_cv import GridSearchCV
from utils.reports import evaluate_classification

ds_c_hard = load_digits()
X, Y = ds_c_hard.data, ds_c_hard.target
X_train, X_test, Y_train, Y_test = train_test_split(X , Y, test_size=0.2, random_state=42)


params = {
    'max_depth': [8, 9, 10],
    'min_samples_split': [1, 2],
    'min_samples_leaf': [1, 2]
}

param_grid = list(ParameterGrid(params))

grid_search = GridSearchCV(OwnDecisionTreeClassifier, param_grid, cv=5)


grid_search.fit(X_train, Y_train)

print(grid_search.best_params)

Y_pred = grid_search.predict(X_test)
evaluate_classification(Y_test, Y_pred)

{'max_depth': 9, 'min_samples_leaf': 1, 'min_samples_split': 1}
Precision: 0.87, Recall: 0.87, F1-Score: 0.87
