# Parameters

In [None]:
features_save_path = 'extracted-features.h5'
model_save_path = 'tomato_model.cpickle'


# Import saved features

In [None]:
import h5py

db = h5py.File(features_save_path)
print(list(db.keys()))

# Define Support Vector Machine classifier

In [None]:
# import packages
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pickle
import h5py




jobs = -1

# open the HDF5 database for reading then determine the index of the training and
# testing split, provided that this data was already shuffled *prior* to writing it to disk
db = h5py.File(features_save_path, "r")

# reserve 75% of dataset for training (index i is the index at which training data end)
# no need to shuffle because it was done earlier (before putting to VGG)
i = int(db["labels"].shape[0] * 0.75)

# define the set of parameters that we want to tune then start a grid
# search where we evaluate our model for each value of C
print("[INFO] tuning hyperparameters...")
param_grid={
            'C': [0.1, 1, 100, 1000],
            'epsilon': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
            'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5]
}

model = GridSearchCV(LogisticRegression(), param_grid, cv = 3, n_jobs = jobs)

# db["features"][:i] Data before index i is training data.
model.fit(db["features"][:i], db["labels"][:i])
print("[INFO] best hyperparameters: {}".format(model.best_params_))

# evaluate the model
print("[INFO] evaluating...")
# db["features"][i:] Data after index i is testing data.
preds = model.predict(db["features"][i:])
print(classification_report(db["labels"][i:], preds, target_names = db["label_names"]))


In [None]:
# serialize the model to disk
print("[INFO] saving model...")
f = open(model_save_path, "wb")
f.write(pickle.dumps(model.best_estimator_))
f.close()

# close the dataset
db.close()