In [17]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import learning_curve, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVC, SVR
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression, Ridge


In [18]:
def warmup_classification(X_train, y_train, multiclass=False):
    # Logistic regression
    param_grid_logistic = {
        'C': [0.001, 0.01, 0.1, 1, 10, 100],
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear', 'saga']
    }
    log_r = LogisticRegression(random_state=42)
    if multiclass == True :
        log_r = LogisticRegression(multi_class='multinomial', random_state=42)
    grid_search_logistic = GridSearchCV(
        log_r, 
        param_grid= param_grid_logistic, 
        cv=5, 
        scoring='accuracy'
    )
    grid_search_logistic.fit(X_train, y_train)
    best_model_logistic = grid_search_logistic.best_estimator_

    # Decision tree
    param_grid_dt = {
        'criterion': ['gini', 'entropy'],
        'max_depth': range(1,20),
        'min_samples_split': range(2,21),
        'min_samples_leaf': range(1,21)
    }
    grid_search_dt = GridSearchCV(
        DecisionTreeClassifier(random_state=42),
        param_grid= param_grid_dt,
        cv = 5,
        scoring = 'accuracy',
        n_jobs= -1
    )
    grid_search_dt.fit(X_train, y_train) 
    best_model_dt = grid_search_dt.best_estimator_
    
    # Gradient Boosting
    param_grid_gb = {
        'learning_rate': [0.01, 0.1, 0.2],
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 4, 5],
        'min_samples_split': [2, 4],
        'min_sample_leaf': [1, 2]
    }
    grid_search_gb = GridSearchCV(
        GradientBoostingClassifier(random_state=42),
        param_grid= param_grid_gb,
        cv= 5,
        n_jobs=-1,
        scoring='accuracy'
    )
    grid_search_gb.fit(X_train, y_train)
    best_model_gb = grid_search_gb.best_estimator_

    #Random forest
    param_grid_rf = {
        'learninig_rate': [0.01, 0.1, 0.2],
        'n_estimators': [100, 200, 300],
        'max_depth': [3,4,5],
        'min_samples_split': [2, 4],
        'min_samples_leaf': [1, 3]
    }
    grid_search_rf = GridSearchCV(
        RandomForestClassifier(random_state=42),
        param_grid= param_grid_rf,
        cv=5,
        verbose=2,
        n_jobs=-1
    )
    grid_search_rf.fit(X_train, y_train)
    best_model_rf = grid_search_rf.best_estimator_

    # SVM
    param_grid_svm = {
        'C': [0.1, 1, 10, 100],
        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
        'kernel': ['rbf', 'poly', 'sigmoid', 'linear']
    }
    svm = SVC(random_state=42)
    if multiclass == True:
        svm = SVC(random_state=42,decision_function_shape='ovo')
    grid_search_svm = GridSearchCV(
        svm, 
        param_grid_svm, 
        refit=True, 
        verbose=3, 
        cv=5, 
        n_jobs=-1
    )
    grid_search_svm.fit(X_train, y_train)
    best_model_svm = grid_search_svm.best_estimator_

    return best_model_logistic, best_model_dt, best_model_gb, best_model_rf, best_model_svm





In [19]:


def warmup_regression(X_train, y_train):
    # Linear Regression 
    param_grid_linear = {
        'alpha': [0.001, 0.01, 0.1, 1, 10, 100]
    }
    grid_search_linear = GridSearchCV(
        Ridge(random_state=42), 
        param_grid=param_grid_linear, 
        cv=5, 
        scoring='neg_mean_squared_error'
    )
    grid_search_linear.fit(X_train, y_train)
    best_model_linear = grid_search_linear.best_estimator_

    # Decision Tree Regressor
    param_grid_dt = {
        'criterion': ['mse', 'friedman_mse', 'mae'],
        'max_depth': range(1,20),
        'min_samples_split': range(2,21),
        'min_samples_leaf': range(1,21)
    }
    grid_search_dt = GridSearchCV(
        DecisionTreeRegressor(random_state=42),
        param_grid=param_grid_dt,
        cv=5,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    grid_search_dt.fit(X_train, y_train)
    best_model_dt = grid_search_dt.best_estimator_
    
    # Gradient Boosting Regressor
    param_grid_gb = {
        'learning_rate': [0.01, 0.1, 0.2],
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 4, 5],
        'min_samples_split': [2, 4],
        'min_samples_leaf': [1, 2]
    }
    grid_search_gb = GridSearchCV(
        GradientBoostingRegressor(random_state=42),
        param_grid=param_grid_gb,
        cv=5,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    grid_search_gb.fit(X_train, y_train)
    best_model_gb = grid_search_gb.best_estimator_

    # Random Forest Regressor
    param_grid_rf = {
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 4, 5],
        'min_samples_split': [2, 4],
        'min_samples_leaf': [1, 2]
    }
    grid_search_rf = GridSearchCV(
        RandomForestRegressor(random_state=42),
        param_grid=param_grid_rf,
        cv=5,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    grid_search_rf.fit(X_train, y_train)
    best_model_rf = grid_search_rf.best_estimator_

    # SVR
    param_grid_svr = {
        'C': [0.1, 1, 10, 100],
        'gamma': ['scale', 'auto'],
        'kernel': ['rbf', 'poly', 'sigmoid', 'linear']
    }
    grid_search_svr = GridSearchCV(
        SVR(),
        param_grid=param_grid_svr,
        cv=5,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    grid_search_svr.fit(X_train, y_train)
    best_model_svr = grid_search_svr.best_estimator_

    return best_model_linear, best_model_dt, best_model_gb, best_model_rf, best_model_svr


In [20]:
def warmup(X_train, y_train, problem = 'binary_classifcation'):
    if problem == 'binary_classification':
        warmup_classification(X_train, y_train)
    elif problem == 'multiclass_classification':
        warmup_classification(X_train, y_train, False)
    else: 
        warmup_regression(X_train, y_train)

In [21]:
def visualization_class(logistic, decision_tree, gradient_boost, random_forest, svm, X_train, y_train, X_val, y_val):
    predictions_train = logistic.predict(X_train)
    predictions_val = logistic.predict(X_val)
    logistic_train = accuracy_score(predictions_train,y_train)
    logistic_val = accuracy_score(predictions_val, y_val)
    print("\033[1m" + "Logistic Regression" + "\033[0m"+":")
    print('train: ',logistic_train*100,'%')
    print('val: ',logistic_val*100,'%')

    predictions_train = decision_tree.predict(X_train)
    predictions_val = decision_tree.predict(X_val)
    dt_train = accuracy_score(predictions_train,y_train)
    dt_val = accuracy_score(predictions_val, y_val)
    print("\033[1m" + "Decision Tree" + "\033[0m"+":")
    print('train: ',dt_train*100,'%')
    print('val: ',dt_val*100,'%')

    predictions_train = gradient_boost.predict(X_train)
    predictions_val = gradient_boost.predict(X_val)
    gb_train = accuracy_score(predictions_train,y_train)
    gb_val = accuracy_score(predictions_val, y_val)
    print("\033[1m" + "Gradient Boost" + "\033[0m"+":")
    print('train: ',gb_train*100,'%')
    print('val: ',gb_val*100,'%')

    predictions_train = random_forest.predict(X_train)
    predictions_val = random_forest.predict(X_val)
    rf_train = accuracy_score(predictions_train,y_train)
    rf_val = accuracy_score(predictions_val, y_val)
    print("\033[1m" + "Random Forest" + "\033[0m"+":")
    print('train: ',rf_train*100,'%')
    print('val: ',rf_val*100,'%')

    predictions_train = svm.predict(X_train)
    predictions_val = svm.predict(X_val)
    svm_train = accuracy_score(predictions_train,y_train)
    svm_val = accuracy_score(predictions_val, y_val)
    print("\033[1m" + "SVM" + "\033[0m"+":")
    print('train: ',svm_train*100,'%')
    print('val: ',svm_val*100,'%')


    

In [22]:


def visualization_regress(linear, decision_tree, gradient_boost, random_forest, svr, X_train, y_train, X_val, y_val):
    predictions_train = linear.predict(X_train)
    predictions_val = linear.predict(X_val)
    linear_train = r2_score(y_train, predictions_train)
    linear_val = r2_score(y_val, predictions_val)
    print("\033[1m" + "Linear Regression" + "\033[0m" + ":")
    print('train: ', linear_train*100, '%')
    print('val: ', linear_val*100, '%')
    
    predictions_train = decision_tree.predict(X_train)
    predictions_val = decision_tree.predict(X_val)
    dt_train = r2_score(y_train, predictions_train)
    dt_val = r2_score(y_val, predictions_val)
    print("\033[1m" + "Decision Tree" + "\033[0m" + ":")
    print('train: ', dt_train*100, '%')
    print('val: ', dt_val*100, '%')
    
    predictions_train = gradient_boost.predict(X_train)
    predictions_val = gradient_boost.predict(X_val)
    gb_train = r2_score(y_train, predictions_train)
    gb_val = r2_score(y_val, predictions_val)
    print("\033[1m" + "Gradient Boost" + "\033[0m" + ":")
    print('train: ', gb_train*100, '%')
    print('val: ', gb_val*100, '%')
    
    predictions_train = random_forest.predict(X_train)
    predictions_val = random_forest.predict(X_val)
    rf_train = r2_score(y_train, predictions_train)
    rf_val = r2_score(y_val, predictions_val)
    print("\033[1m" + "Random Forest" + "\033[0m" + ":")
    print('train: ', rf_train*100, '%')
    print('val: ', rf_val*100, '%')
    
    predictions_train = svr.predict(X_train)
    predictions_val = svr.predict(X_val)
    svr_train = r2_score(y_train, predictions_train)
    svr_val = r2_score(y_val, predictions_val)
    print("\033[1m" + "SVR" + "\033[0m" + ":")
    print('train: ', svr_train*100, '%')
    print('val: ', svr_val*100, '%')
