In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import argparse
import pickle
import h5py

In [6]:
# open the HDF5 database for reading then determine the index of
# the training and testing split, provided that this data was
# already shuffled *prior* to writing it to disk
db = h5py.File("/media/pavle/HDD_disk/deep-learning/dog-breed-dataset/features.hdf5")
i = int(db["labels"].shape[0] * 0.75)

In [7]:
print("[INFO] tuning hyperparameters...")
params = {"C": [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]}
model = GridSearchCV(LogisticRegression(), params, cv=3, n_jobs=-1, pre_dispatch=6)
model.fit(db["features"][:i], db["labels"][:i])

[INFO] tuning hyperparameters...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

GridSearchCV(cv=3, estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]},
             pre_dispatch=6)

In [8]:
print("[INFO] best hyperparameters: {}".format(model.best_params_))

[INFO] best hyperparameters: {'C': 0.1}


In [9]:
# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(db["features"][i:])
print("[INFO] Done!")

[INFO] evaluating...
[INFO] Done!


In [10]:
print(db["labels"][i:])
print(db["label_names"])
print(classification_report(db["labels"][i:], preds, target_names=[str(x, "utf-8") for x in db["label_names"]]))

[113  94  18 ...  57  75  98]
<HDF5 dataset "label_names": shape (120,), type "|O">
                                precision    recall  f1-score   support

                  Afghan_hound       0.87      0.74      0.80        78
           African_hunting_dog       0.90      0.90      0.90        39
                      Airedale       0.76      0.67      0.71        52
American_Staffordshire_terrier       0.42      0.33      0.37        39
                   Appenzeller       0.31      0.24      0.27        41
            Australian_terrier       0.56      0.60      0.58        55
            Bedlington_terrier       0.94      0.92      0.93        50
          Bernese_mountain_dog       0.79      0.69      0.74        49
              Blenheim_spaniel       0.83      0.94      0.88        47
                 Border_collie       0.53      0.63      0.58        38
                Border_terrier       0.75      0.79      0.77        34
                   Boston_bull       0.62      0.78

In [11]:
# serialize the model to disk
print("[INFO] saving model...")
f = open("model.hdf5", "wb")
f.write(pickle.dumps(model.best_estimator_))
f.close()
# close the database
db.close()

[INFO] saving model...
