# Parameters

In [1]:
features_save_path = 'extracted-features.h5'
model_save_path = 'tomato_model.cpickle'


# Import saved features

In [2]:
import h5py

db = h5py.File(features_save_path)
print(list(db.keys()))

['features', 'label_names', 'labels']


  This is separate from the ipykernel package so we can avoid doing imports until


# Train logistical regression classifier

In [3]:
# import packages
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pickle
import h5py

jobs = -1

# open the HDF5 database for reading then determine the index of the training and
# testing split, provided that this data was already shuffled *prior* to writing it to disk
db = h5py.File(features_save_path, "r")

# reserve 75% of dataset for training (index i is the index at which training data end)
# no need to shuffle because it was done earlier (before putting to VGG)
i = int(db["labels"].shape[0] * 0.75)

# define the set of parameters that we want to tune then start a grid
# search where we evaluate our model for each value of C
print("[INFO] tuning hyperparameters...")
params = {"C": [0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0]}
model = GridSearchCV(LogisticRegression(), params, cv = 3, n_jobs = jobs)

# db["features"][:i] Data before index i is training data.
model.fit(db["features"][:i], db["labels"][:i])
print("[INFO] best hyperparameters: {}".format(model.best_params_))

# evaluate the model
print("[INFO] evaluating...")
# db["features"][i:] Data after index i is testing data.
preds = model.predict(db["features"][i:])
print(classification_report(db["labels"][i:], preds, target_names = db["label_names"]))


[INFO] tuning hyperparameters...
[INFO] best hyperparameters: {'C': 0.1}
[INFO] evaluating...
                                               precision    recall  f1-score   support

                      Tomato___Bacterial_spot       0.95      0.98      0.97       553
                        Tomato___Early_blight       0.90      0.79      0.84       245
                         Tomato___Late_blight       0.95      0.94      0.95       459
                           Tomato___Leaf_Mold       0.96      0.94      0.95       240
                  Tomato___Septoria_leaf_spot       0.94      0.95      0.94       421
Tomato___Spider_mites Two-spotted_spider_mite       0.92      0.96      0.94       401
                         Tomato___Target_Spot       0.92      0.90      0.91       400
       Tomato___Tomato_Yellow_Leaf_Curl_Virus       0.99      1.00      0.99      1365
                 Tomato___Tomato_mosaic_virus       0.97      0.98      0.97        96
                             Tomato

In [7]:
# serialize the model to disk
print("[INFO] saving model...")
f = open(model_save_path, "wb")
f.write(pickle.dumps(model.best_estimator_))
f.close()

# close the dataset
db.close()

[INFO] saving model...
