**DATA** **GATHERING**

In [None]:
!pip install ipython-autotime
%load_ext autotime

In [None]:
# DATA : Images
# 1. Download manually from Google
# 2. Download dataset from Kaggle
# 3. Build a Image Web Crawler (Web Scraping)
# 4. Use python libraries to scrape the images

time: 400 µs (started: 2022-12-09 14:42:34 +00:00)


In [None]:
# ! mark - It means run it as a shell command rather than a notebook command.
#Python library to download images from Bing.com
!pip install bing-image-downloader

In [None]:
!mkdir images # Directory for images

time: 112 ms (started: 2022-12-09 14:42:36 +00:00)


In [None]:
from bing_image_downloader import downloader
#30 images will be downloaded from bing and sent to the images directory we created
# adult_filter_off is beacuse we don't want any unwanted images
downloader.download("sunflower", limit = 30, output_dir = 'images',
                    adult_filter_off = True)

In [None]:
downloader.download("rugby ball leather", limit = 30, output_dir = 'images',
                    adult_filter_off = True)

In [None]:
downloader.download("ice cream cone", limit = 30, output_dir = 'images',
                    adult_filter_off = True)

In [None]:
# Preprocessing
# 1. Resizing
# 2. Flattening

# To play around with the folders
import os 
# To display the image
import matplotlib.pyplot as plt
# To do some numerical computing
import numpy as np
# To read the image
from skimage.io import imread
# To resize the image (For the model, we need the images to be of the same size)
from skimage.transform import resize

time: 987 ms (started: 2022-12-09 14:43:26 +00:00)


In [None]:
import numpy as np

a = np.array([[1,2,3,4,5],
             [6,7,8,9,45]])
a.ndim

2

time: 4.25 ms (started: 2022-12-09 14:43:27 +00:00)


In [None]:
# How to convert Matrix to Vector? - Flatten
# Flatten : 2D to 1D
a.flatten()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 45])

time: 8.57 ms (started: 2022-12-09 14:43:27 +00:00)


In [None]:
# We need the data in two different formats : Input and Output
# We have to flatten the image pixels from 2D to 1D (Matrix to Vector)

target = [] # Output
images = [] # Matrix form
flat_data = [] # Vector form (Input)

DATADIR = '/content/images'
CATEGORIES = ['sunflower','rugby ball leather','ice cream cone']

# Iterate through the categories
for category in CATEGORIES:
  class_num = CATEGORIES.index(category) # Label Encoding the values
  # Creates the path to use all the images
  path = os.path.join(DATADIR,category) # Joins the 2 paths (DATADIR and category)

  for img in os.listdir(path):
    img_array = imread(os.path.join(path,img))
    # print(img_array)
    # plt.imshow(img_array)

    #Resizing the images
    img_resized = resize(img_array,(150,150,3)) # skimage - Normalizes the data from 0 to 1 ( Values 0 to 255 - Value b/w 0 and 1)
    # Storing the resized images in images
    images.append(img_resized)
    # Flattening the images and storing it in flat_data
    flat_data.append(img_resized.flatten())
    target.append(class_num)

# We don't anything to be outside numpy
# If it is in numpy, its easy to iterate and go through it
flat_data = np.array(flat_data)
target = np.array(target)
images = np.array(images)

time: 28 s (started: 2022-12-09 14:43:27 +00:00)


In [None]:
flat_data[0]

array([0.93122876, 0.3390719 , 0.02352941, ..., 0.73661438, 0.0627451 ,
       0.02352941])

time: 13.8 ms (started: 2022-12-09 14:43:55 +00:00)


In [None]:
target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2])

time: 12.4 ms (started: 2022-12-09 14:43:55 +00:00)


In [None]:
#unique,count = np.unique(target, return_counts=True)
#plt.bar(CATEGORIES,count)
np.unique(target, return_counts=True)

(array([0, 1, 2]), array([30, 30, 30]))

time: 7.03 ms (started: 2022-12-09 14:43:55 +00:00)


In [None]:
# Split data into Training and Testing

from sklearn.model_selection import train_test_split
# X : flat_data, Y : target
x_train,x_test,y_train,y_test = train_test_split(flat_data,target,
                                                 test_size = 0.3,random_state = 110)

time: 179 ms (started: 2022-12-09 14:43:55 +00:00)


In [None]:
# Using classification algorithm to classify the images
from sklearn.model_selection import GridSearchCV
from sklearn import svm

# For any algorithm in machine learning, you have to learn its specific parameters
# In SVM, you need to get the hyperparameter, to separate b/w the regions(parameters)
# Best parameters

param_grid = [
             {'C':[1,10,100,1000],'kernel':['linear']},
             {'C':[1,10,100,1000],'gamma':[0.001,0.0001],'kernel':['rbf']}
]

# To get the probabilities of all three
svc = svm.SVC(probability=True)
"""
GridSearchCV is a technique to search through the best parameter values from the given set of the grid of parameters.
The model and the parameters are required to be fed in. 
Best parameter values are extracted and then the predictions are made.
It uses a different combination of all the specified hyperparameters and their values, 
and calculates the performance for each combination and selects the best value for the hyperparameters.
"""
clf = GridSearchCV(svc,param_grid)
clf.fit(x_train,y_train)

GridSearchCV(estimator=SVC(probability=True),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}])

time: 23.3 s (started: 2022-12-09 14:43:55 +00:00)


In [None]:
# Checking the accuracy
y_pred = clf.predict(x_test)
y_pred

array([2, 2, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 1, 2, 2, 0, 2, 0, 1, 2, 0,
       2, 0, 2, 2, 1])

time: 34.4 ms (started: 2022-12-09 14:44:19 +00:00)


In [None]:
y_test

array([0, 2, 1, 0, 1, 1, 2, 2, 2, 1, 0, 0, 2, 1, 2, 0, 1, 2, 0, 1, 0, 0,
       2, 0, 1, 2, 1])

time: 3.14 ms (started: 2022-12-09 14:44:19 +00:00)


In [None]:
# To evaluate our model
from sklearn.metrics import accuracy_score,confusion_matrix

time: 450 µs (started: 2022-12-09 14:44:19 +00:00)


In [None]:
accuracy_score(y_pred,y_test)

0.6296296296296297

time: 12.1 ms (started: 2022-12-09 14:44:19 +00:00)


In [None]:
confusion_matrix(y_pred,y_test)

array([[5, 2, 1],
       [1, 4, 0],
       [3, 3, 8]])

time: 6.61 ms (started: 2022-12-09 14:44:19 +00:00)


In [None]:
# Save the model using Pickle library
import pickle
pickle.dump(clf,open('img_model.p','wb'))

time: 31.9 ms (started: 2022-12-09 14:44:19 +00:00)


In [None]:
model = pickle.load(open('img_model.p','rb'))

time: 5.16 ms (started: 2022-12-09 14:44:19 +00:00)


In [None]:
# Testing a brand new image
flat_data = []
url = input('Enter your URL') #Enter URL of the image (Use jpg image)
img = imread(url)
img_resized = resize(img,(150,150,3))
flat_data.append(img_resized.flatten())
flat_data = np.array(flat_data)
print(img.shape)
plt.imshow(img_resized)

y_out = model.predict(flat_data)
y_out = CATEGORIES[y_out[0]]
print (f' PREDICTED OUTPUT: {y_out}')