In [1]:
import os
import pandas as pd

path = os.path.join('datasets', 'Deloitte Team Challenge')
data_path = os.path.join(path, 'chair_automization.csv')

chair_data = pd.read_csv(data_path)

In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(chair_data[['Gender (1=Female 0=Male)', 'Apparent '
                                                                                            'Temperature ('
                                                                                            'C)',
                                                                'Humidity', 'Wind Speed (km/h)', 'Visibility (km)',
                                                                'Body pain', 'Daily hours spent in chair', 'Height',
                                                                'Weight', 'Shoulder-/Arm length', 'Height (sitting)',
                                                                'Height (standing)', 'Ergonomic chair']], chair_data['Preference for '
                                                                'relaxation '
                                                                'settings'], test_size=0.2, random_state=42)


In [14]:
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier

log_clf = LogisticRegression(solver='saga')
svm_clf = SVC()
forest_clf = RandomForestClassifier()
sgd_clf = SGDClassifier()



In [18]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1
)

In [19]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm='SAMME.R', learning_rate=0.5
)


In [20]:
from sklearn.ensemble import GradientBoostingClassifier

gbclf = GradientBoostingClassifier(max_depth=2, n_estimators= 3, learning_rate= 1.0)

In [21]:
voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', forest_clf), ('svc', svm_clf), ('SGD', sgd_clf), ('bagging_clf', bag_clf), ('ada', ada_clf), ('GBC', gbclf)], voting='hard')

voting_clf.fit(x_train, y_train)



VotingClassifier(estimators=[('lr', LogisticRegression(solver='saga')),
                             ('rf', RandomForestClassifier()), ('svc', SVC()),
                             ('SGD', SGDClassifier()),
                             ('bagging_clf',
                              BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                                max_samples=100,
                                                n_estimators=500, n_jobs=-1)),
                             ('ada',
                              AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                                                 learning_rate=0.5,
                                                 n_estimators=200)),
                             ('GBC',
                              GradientBoostingClassifier(learning_rate=1.0,
                                                         max_depth=2,
                                                         n

In [23]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, forest_clf, svm_clf, sgd_clf, bag_clf, ada_clf, gbclf, voting_clf):
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))



LogisticRegression 0.6089108910891089
RandomForestClassifier 0.7079207920792079
SVC 0.6039603960396039
SGDClassifier 0.5594059405940595
BaggingClassifier 0.6831683168316832
AdaBoostClassifier 0.6534653465346535
GradientBoostingClassifier 0.6732673267326733




VotingClassifier 0.6534653465346535
