In [1]:
#IMPORTS
import numpy as np
# import matplotlib.pyplot as plt
import os
import cv2
import pickle
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold



In [2]:
# a feature extracting kernel made for increasing accuracy
def extraction (image):
    Kernel  = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    img_k = cv2.filter2D(image,-1, kern)
    img = cv2.cvtColor(img_k, cv2.COLOR_BGR2HSV)
    img[:,:,1] = 255
    img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
    return img

In [3]:
# Loading Datasets

datadir = r"C:\Users\LG\Desktop\Ripe Fruit Detector"
Categories = ['Ripe', 'Unripe']
training_data = []
test_images = ['Test']
test_data = []
def train ():

    for category in Categories:
        path = os.path.join(datadir, category)
        class_num = Categories.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                img = cv2.cvtColor(img_array, cv2.COLOR_BGR2HSV)
                img[:,:,1] = 255
                
                img_array = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
                new_array = cv2.resize(img_array, (100, 100))
                # here i changed flatten to reshaped (1, -1)
                image = np.array(new_array).flatten()
                # image = np.array(image).reshape(1,-1)
                training_data.append([image, class_num])
            except:
                pass

def test ():
    for category in test_images:
        path = os.path.join(datadir, category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))

                img_hsv = cv2.cvtColor(img_array, cv2.COLOR_BGR2HSV)
                img_hsv[:, :, 1] = 255
                img_array = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR)
                new_array = cv2.resize(img_array, (100, 100))
                # here i changed flatten to reshaped (1, -1)
                image = np.array(new_array).flatten()
#                 image = np.array(image).reshape(1,-1)

                test_data.append([image])
            except:
                pass


In [94]:
#TRAINING DATA PREPARATION 
train()
random.shuffle(training_data)
print(len(training_data))
with open('training_data.pkl', 'wb') as f:
    pickle.dump(training_data,f)
print ('Saved Successfully!')


252
Saved Successfully!


In [95]:
### SPLITTING DATA INTO ATTRIBUTES AND LABELS ###

with open('training_data.pkl', 'rb') as f:
    training_data = pickle.load(f)

x = []
y = []
for features, label in training_data:
    x.append(features)
    y.append(label)
    


with open('x.pkl', 'wb') as feat:
    pickle.dump(x,feat)
print ('Saved Attributes Successfully!')

with open('y.pkl', 'wb') as lab:
    pickle.dump(y,lab)
print ('Saved labels Successfully!')

Saved Attributes Successfully!
Saved labels Successfully!


In [96]:
###### LOADING Training dataset for splitting ####

with open('x.pkl', 'rb')as feat:
    X = pickle.load(feat)
with open('y.pkl ', 'rb') as lab:
    Y = pickle.load(lab)




### TRAINING Time bro ###
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.25)


# xtrain = sc.fit_transform(xtrain)
# xtest = sc.transform(xtest)

model = SVC(C=1 , kernel = 'linear', gamma='auto')
# model = CalibratedClassifierCV(model)
model.fit(xtrain, ytrain)

Accuracy = model.score(xtest, ytest)
overfit = model.score(xtrain, ytrain)
predictions = model.predict(xtest)
print ('Accuracy: ', Accuracy)
print ('Overfit: ', overfit)
print(classification_report(ytest, predictions ))


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(ytest, predictions)
print (cm)



Accuracy:  0.8888888888888888
Overfit:  1.0
              precision    recall  f1-score   support

           0       0.97      0.84      0.90        37
           1       0.81      0.96      0.88        26

    accuracy                           0.89        63
   macro avg       0.89      0.90      0.89        63
weighted avg       0.90      0.89      0.89        63

[[31  6]
 [ 1 25]]


In [97]:
#decision tree

from sklearn.tree import DecisionTreeClassifier
from sklearn . ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import cross_val_score

In [101]:
bg = BaggingClassifier(SVC(C=1 , kernel = 'linear', gamma='auto', probability = True), max_samples = 1.0 , max_features = 1.0, n_estimators = 30, oob_score = True)

In [102]:
bg.fit(xtrain , ytrain)

BaggingClassifier(base_estimator=SVC(C=1, break_ties=False, cache_size=200,
                                     class_weight=None, coef0=0.0,
                                     decision_function_shape='ovr', degree=3,
                                     gamma='auto', kernel='linear', max_iter=-1,
                                     probability=True, random_state=None,
                                     shrinking=True, tol=0.001, verbose=False),
                  bootstrap=True, bootstrap_features=False, max_features=1.0,
                  max_samples=1.0, n_estimators=30, n_jobs=None, oob_score=True,
                  random_state=None, verbose=0, warm_start=False)

In [103]:
print(bg.score (xtest, ytest))
bg.predict(xtest)
overfit = bg.score(xtrain, ytrain)
predictions = bg.predict(xtest)
# print ('Accuracy: ', Accuracy)
print ('Overfit: ', overfit)
print(classification_report(ytest, predictions ))


from sklearn.metrics import confusion_matrix
cm = confusion_matrix(ytest, predictions)
print (cm)
# print(bg.score (xtrain, ytrain))

0.8888888888888888
Overfit:  1.0
              precision    recall  f1-score   support

           0       0.97      0.84      0.90        37
           1       0.81      0.96      0.88        26

    accuracy                           0.89        63
   macro avg       0.89      0.90      0.89        63
weighted avg       0.90      0.89      0.89        63

[[31  6]
 [ 1 25]]


In [None]:
#Assessing the model
clf = cross_val_score(bg, X, Y, cv = 10)
clf

In [52]:
clf = cross_val_score(bg, X, Y, cv = 10)
clf

array([0.87234043, 0.91489362, 0.82608696, 0.7826087 , 0.82608696,
       0.84782609, 0.80434783, 0.95652174, 0.84782609, 0.91304348])

In [109]:
clf = cross_val_score(model, X, Y, cv = 10)
clf

array([0.81132075, 0.77358491, 0.86792453, 0.8490566 , 0.83018868,
       0.90566038, 0.83018868, 0.75471698, 0.8490566 , 0.83018868])

In [87]:
# dt = DecisionTreeClassifier(max_depth = 2)
# dt.fit(xtrain, ytrain)
# dt.score(xtrain, ytrain)
rf = RandomForestClassifier(n_estimators = 11)
rf.fit(xtrain, ytrain)
rf.score(xtest, ytest)
# rf.score(xtrain, ytrain)

0.5714285714285714

In [78]:
dt.score(xtest , ytest)


0.5

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [1,1.1,1.5, 2,10 ],'gamma': ['scalar', 'auto'],'kernel': ['linear','poly'] }, 
                {'C': [1,1.1,1.5, 2, 10 ],'gamma': [0.1,0.2,0.3,0.4, 0.5, 1 ],'kernel': ['rbf']}, ]

gridsearch = GridSearchCV(estimator = model, param_grid= parameters, scoring = 'accuracy', cv = 10, n_jobs = 2)
gridsearch = gridsearch.fit(xtrain, ytrain)


In [186]:

print(gridsearch.best_params_)

{'C': 1, 'gamma': 'auto', 'kernel': 'linear'}


In [128]:
### Saving the damn model ###
with open('Banana.pkl', 'wb') as mod:
    pickle.dump(bg, mod)
print (f'Saved pineapple Successfully!')

Saved pineapple Successfully!


In [None]:
### Testing images to confirm stuff, i.e importing the image directly ###
img = cv2.imread('IMG_20201218_171647_152.jpg')
img = cv2.resize(img, (100,100))
img = np.array(img).reshape(1,-1)

cv2.imshow('test',img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [21]:
#### for testing if you have multiple real world datasets ####
test_data = []
test()
print(len(test_data))
with open('real.pkl', 'wb') as t:
    pickle.dump(test_data,t)
print ('Saved Successfully!')

4
Saved Successfully!


In [127]:
# with open('Orange Model_sat.pkl', 'rb') as moda:
#     model = pickle.load(moda)
# with open('banana_v1.pkl', 'rb') as mod:

#     model1 = pickle.load(mod)
    
    
with open('real.pkl', 'rb') as ril:
    real = pickle.load(ril)

for p in real:
    p = np.array(p).reshape(1,-1)
    
    # p.reshape(1,-1)

#     print('+data model: ', Categories[model1.predict(p)[0]],model1.predict_proba(p) )
#     print('Proba model: ', Categories[model.predict(p)[0]],model.predict_proba(p) )
    print('Normal model: ', Categories[bg.predict(p)[0]],bg.predict_proba(p))


Normal model:  Unripe [[0.18744144 0.81255856]]
Normal model:  Unripe [[0.29464335 0.70535665]]
Normal model:  Ripe [[0.90884104 0.09115896]]


In [21]:
chai = cv2.imread("IMG_20210111_172305.jpg")

In [22]:
omo =cv2.resize(chai, (100,100))

In [23]:
cv2.imshow('omoooo', omo)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
import pickle
import bz2
import _pickle as cPickle
with open('Orange.pkl', 'rb') as moda:
    model = pickle.load(moda)
with open('Pineapple.pkl','rb') as pine:
    model0 = pickle.load(pine)
with open('Banana.pkl','rb') as ban:
    model1 = pickle.load(ban)
# Pickle a file and then compress it into a file with extension 
def compressed_pickle(title, data):
    with bz2.BZ2File(title + '.pbz2', 'w') as f: 
        cPickle.dump(data, f)
compressed_pickle('orange', model)
compressed_pickle('pineapple', model0)
compressed_pickle('banana', model1)

In [22]:
import pickle
import bz2
import _pickle as cPickle

# Load any compressed pickle file
# def decompress_pickle(file):
#     data = bz2.BZ2File(file, 'rb')
#     data = cPickle.load(data)
#     return data
# model  = decompress_pickle('banana.pbz2')

with open('real.pkl', 'rb') as ril:
    real = pickle.load(ril)
for p in real:
    p = np.array(p).reshape(1,-1)
    
    # p.reshape(1,-1)
#     print('+data model: ', Categories[model1.predict(p)[0]],model1.predict_proba(p) )
    print('Proba model: ', Categories[model.predict(p)[0]],model.predict_proba(p) )
#     print('Normal model: ', Categories[bg.predict(p)[0]],bg.predict_proba(p))


Proba model:  Ripe [[0.78785347 0.21214653]]
Proba model:  Ripe [[0.9124426 0.0875574]]
Proba model:  Unripe [[0.04711674 0.95288326]]
Proba model:  Ripe [[0.55816224 0.44183776]]
