In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import  GridSearchCV

import pandas as pd
import numpy as np

In [34]:
df = pd.read_csv("E:\\research\\Spacematch\\pilot study\\feedback_form20210903.csv") #import the 34 data-points

In [35]:
df.head() 

Unnamed: 0,num,Time,Clothing_Level,indoor_temperature,indoor_humidity,weather,outdoor_temperature,outdoor_humidity,Thermal_comfort
0,1,6:12:00 PM,1,70.47,46.77,sunny,78,45,2
1,2,6:16:00 PM,1,70.49,46.78,sunny,78,45,3
2,3,6:18:00 PM,1,70.6,46.65,sunny,78,45,3
3,4,6:19:00 PM,1,70.64,46.6,sunny,78,45,3
4,5,6:21:00 PM,1,70.71,46.61,sunny,78,45,3


In [36]:
y = df.Thermal_comfort
X = df.drop(["num","Thermal_comfort", "Time", "weather"], axis = 1)  #Get X and y

In [41]:
model_and_params = {
    'random_forest':{
        'model': RandomForestClassifier(),
        'params':{
            "n_estimators"     :range(1,20,1),
            "max_features"     :range(1,6,1),
            "max_depth"        :range(1,6,1)
        }
    },
    'knn':{
        'model': KNeighborsClassifier(),
        'params':{
            "n_neighbors"     :range(1,10,1),
            "weights"         :['uniform', 'distance'], 
            "p"               : range(1,3,1)
        }
    },
    'logistic_regression':{
        'model': LogisticRegression(),
        'params':{
            "solver"          :['newton-cg', 'lbfgs'],
            "C"               :[0.1, 0.2],
            "max_iter"        :[1000]
        }
    }
}

In [42]:
scores = []
for model_name, model_params in model_and_params.items():
    clf = GridSearchCV(model_params['model'], model_params['params'], cv=3, return_train_score = False)
    clf.fit(X, y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

In [47]:
df = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df

Unnamed: 0,model,best_score,best_params
0,random_forest,0.704545,"{'max_depth': 3, 'max_features': 4, 'n_estimat..."
1,knn,0.585859,"{'n_neighbors': 6, 'p': 1, 'weights': 'distance'}"
2,logistic_regression,0.439394,"{'C': 0.2, 'max_iter': 1000, 'solver': 'newton..."


In [48]:
scores

[{'model': 'random_forest',
  'best_score': 0.7045454545454546,
  'best_params': {'max_depth': 3, 'max_features': 4, 'n_estimators': 9}},
 {'model': 'knn',
  'best_score': 0.5858585858585857,
  'best_params': {'n_neighbors': 6, 'p': 1, 'weights': 'distance'}},
 {'model': 'logistic_regression',
  'best_score': 0.43939393939393945,
  'best_params': {'C': 0.2, 'max_iter': 1000, 'solver': 'newton-cg'}}]