In [6]:
import cv2 # you need to install opencv library, coz it will help in 
           # getting the peaks in the image
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.model_selection import train_test_split

In [7]:
#cell 2

# Each image has a mid line from where the peaks start.
# we are trying to detect the row number of that line in the image.
# The way we are detecting it is through the colour pixels in the image.
# Each blue colour pixel in the image has the RGB values (0,114,189)
# so if a row contains more than 50 pixels consecutively which contains this
# colour, we conclude that it is the middle line.

def get_mid_line(image):
    count = 0
    for y in range(len(image)):
        for x in range(len(image[y])):
            if (np.array_equal(image[y][x], np.array([0,114,189]))):
                count += 1
            else:
                count = 0
            if (count >= 50):
                return y

In [8]:
#cell 3

# now we will get the sum of the peaks in the given image
# To do this we use opencv's cornerHarris function.
# this function gives the corners in an image which in our case is peaks.
# we get the coordinates of these peaks through the cornerHarris function, 
# and we subtract it from the coordinates of mid line in order to find the
# height of the peak. Then we add all these heights to get the sum of the 
# heights which is our feature for classifying the image.

def give_peak_sum(file):
    image = cv2.imread(file) # opencv's image read function
    image_copy = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # converts image from
    # BGR color space to RGB color space
    
    # converting to gray scale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = np.float32(gray)
    
    # detect corners
    dst = cv2.cornerHarris(gray, 2, 3, 0.04)
    
    # dilate corner image to enhance corner points
    dst = cv2.dilate(dst, None)
    
    thresh = 0.02*dst.max()
    
    peak_sum = 0
    mid_line = get_mid_line(image_copy) # using the previously defined function
    
    for j in range(0, dst.shape[0]):
        for i in range(0, dst.shape[1]):
            if (dst[j, i] > thresh):
                peak_sum += abs(j-mid_line)
                
    return (peak_sum)
    

In [9]:
# cell 4

# Here we are just collecting the data that we have

full_images = []
bottom_images = []

for file in os.listdir("full"): # I don't know the path of the data files
                                # in your system, modify the path accordingly.
    full_images.append(file)
    
for file in os.listdir("bottom"):
    bottom_images.append(file)
    
print (len(full_images), len(bottom_images))

134 94


In [10]:
#cell 5

# Here for each image, we will get the sum of peaks. This is the main
# training step, it might take around 30-45 minutes to run depending on your
# system capabilities

data_full = []
data_bottom = []

for file in full_images:
    data_full.append((give_peak_sum("full/"+file),0))
    
for file in bottom_images:
    data_bottom.append((give_peak_sum("bottom/"+file), 1))
    
print (len(data_full), len(data_bottom))


134 94


In [11]:
# cell 6

# Here we mix the two lists data_full and data_bottom and shuffle it for 
# randomness

data = data_full + data_bottom
random.shuffle(data)

NameError: name 'random' is not defined

In [None]:
#cell 7

# here we take features in x variable and labels in y variable because
# skikit learn libraries require it differently

x = [[each[0]] for each in data]
y = [[each[1]] for each in data]
print (len(x), len(y))


In [None]:
# cell 8

# Here we split our dataset of total 228 images into training and testing
# datasets

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print (len(x_train), len(x_test))
    
    

In [None]:
#cell 9

# Here we train our data on a Random Forest Classifier algorithm

from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
clf.fit(x_train, y_train)




In [None]:
# cell 10

# Here we test the accuracy of our model

correct = 0
for i in range(len(x_test)):
    if (clf.predict([x_test[i]]) == y_test[i]):
        correct += 1

print (correct/float(len(x_test))) # this will print out the accuracy of the model.

In [None]:


# #cell 2

# # Each image has a mid line from where the peaks start.
# # we are trying to detect the row number of that line in the image.
# # The way we are detecting it is through the colour pixels in the image.
# # Each blue colour pixel in the image has the RGB values (0,114,189)
# # so if a row contains more than 50 pixels consecutively which contains this
# # colour, we conclude that it is the middle line.

# def get_mid_line(image):
#     count = 0
#     for y in range(len(image)):
#         for x in range(len(image[y])):
#             if (np.array_equal(image[y][x], np.array([0,114,189]))):
#                 count += 1
#             else:
#                 count = 0
#             if (count >= 50):
#                 return y


# #cell 3

# # now we will get the sum of the peaks in the given image
# # To do this we use opencv's cornerHarris function.
# # this function gives the corners in an image which in our case is peaks.
# # we get the coordinates of these peaks through the cornerHarris function, 
# # and we subtract it from the coordinates of mid line in order to find the
# # height of the peak. Then we add all these heights to get the sum of the 
# # heights which is our feature for classifying the image.

# def give_peak_sum(file):
#     image = cv2.imread(file) # opencv's image read function
#     image_copy = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # converts image from
#     # BGR color space to RGB color space
    
#     # converting to gray scale
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     gray = np.float32(gray)
    
#     # detect corners
#     dst = cv2.cornerHarris(gray, 2, 3, 0.04)
    
#     # dilate corner image to enhance corner points
#     dst = cv2.dilate(dst, None)
    
#     thresh = 0.02*dst.max()
    
#     peak_sum = 0
#     mid_line = get_mid_line(image_copy) # using the previously defined function
    
#     for j in range(0, dst.shape[0]):
#         for i in range(0, dst.shape[1]):
#             if (dst[j, i] > thresh):
#                 peak_sum += abs(j-mid_line)
                
#     return (peak_sum)
    
    
# # cell 4

# # Here we are just collecting the data that we have

# full_images = []
# bottom_images = []

# for file in os.listdir("full"): # I don't know the path of the data files
#                                 # in your system, modify the path accordingly.
#     full_images.append(file)
    
# for file in os.listdir("bottom")
#     bottom_images.append(file)
    
# print (len(full_images), len(bottom_images))


# #cell 5

# # Here for each image, we will get the sum of peaks. This is the main
# # training step, it might take around 30-45 minutes to run depending on your
# # system capabilities

# data_full = []
# data_bottom = []

# for file in full_images:
#     data_full.append((give_peak_sum("full/"+file),0))
    
# for file in bottom_images:
#     data_bottom.append((give_peak_sum("bottom/"+file), 1))
    
# print (len(data_full), len(data_bottom))


# # cell 6

# # Here we mix the two lists data_full and data_bottom and shuffle it for 
# # randomness

# data = data_full + data_bottom
# random.shuffle(data)

# #cell 7

# # here we take features in x variable and labels in y variable because
# # skikit learn libraries require it differently

# x = [[each[0]] for each in data]
# y = [[each[1]] for each in data]
# print (len(x), len(y))


# # cell 8

# # Here we split our dataset of total 228 images into training and testing
# # datasets

# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
# print (len(x_train), len(x_test))


# #cell 9

# # Here we train our data on a Random Forest Classifier algorithm

# from sklearn.ensemble import RandomForestClassifier

# clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
# clf.fit(x_train, y_train)


# # cell 10

# # Here we test the accuracy of our model

# correct = 0
# for i in range(len(x_test)):
#     if (clf.predict([x_test[i]]) == y_test[i]):
#         correct += 1

# print (correct/float(len(x_test))) # this will print out the accuracy of the model.
    
    
    
    


