In [5]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_human_dataset():
    feature_name_df = pd.read_csv('../../../datasets/uci_har/human_activity/features.txt', sep='\s+', header=None, names=['column_index', 'column_name'])
    feature_name = feature_name_df.iloc[:, 1].values.tolist()
    feature_name = list(set(feature_name))

    X_train = pd.read_csv('../../../datasets/uci_har/human_activity/train/X_train.txt', sep='\s+', names=feature_name)
    X_test = pd.read_csv('../../../datasets/uci_har/human_activity/test/X_test.txt', sep='\s+', names=feature_name)
    
    y_train = pd.read_csv('../../../datasets/uci_har/human_activity/train/y_train.txt', sep='\s+', header=None, names=['action'])
    y_test = pd.read_csv('../../../datasets/uci_har/human_activity/test/y_test.txt', sep='\s+', header=None, names=['action'])

    return X_train, X_test, y_train, y_test

In [6]:
X_train, X_test, y_train, y_test = get_human_dataset()

start_time = time.time()
gb_clf = GradientBoostingClassifier(random_state=0)
gb_clf.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)

print(round(gb_accuracy, 4))
print(round(time.time() - start_time, 4))

0.942
486.748


In [7]:
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators':[100, 500],
    'learning_rate' : [0.05, 0.1]
}

grid_cv = GridSearchCV(gb_clf, param_grid=params, cv=2, verbose=1)
grid_cv.fit(X_train, y_train)
print(grid_cv.best_params_)
print(grid_cv.best_score_)

Fitting 2 folds for each of 4 candidates, totalling 8 fits
{'learning_rate': 0.1, 'n_estimators': 500}
0.9017954298150164


In [8]:
gb_pred = grid_cv.best_estimator_.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)
print(gb_accuracy)

0.9446895147607737
