# Classifying images, day or night

## Collecting data
The goal of this script is to take all of the images in the folder, and create a vector of color frequencies by checking each pixel

First, let's import the libraries we will be using

In [2]:
# Python code to read image
import cv2
import glob
from scipy.cluster import hierarchy
import math
import pandas as pd

I used glob to collect the images from the folders

In [10]:
folders = glob.glob("images/*")
imageFolders = []
#vectors = []

for folder in folders:
    print(folder)
    
    newDir = folder + "/*"
    folderImages = glob.glob(newDir)

    files = []
    for nextFile in folderImages:
        print("\t", nextFile)
        files.append(nextFile)
        #vectors.append(vecFromFilename(nextFile))
    
    imageFolders.append(files)


images\daytime
	 images\daytime\daytime_2.jpg
	 images\daytime\daytime_3.jpg
	 images\daytime\daytime_1.jpg
images\night
	 images\night\night_3.jpg
	 images\night\night_2.jpg
	 images\night\night_1.jpg
images\sunset
	 images\sunset\sunset_3.jpg
	 images\sunset\sunset_2.jpg
	 images\sunset\sunset_1.jpg


### OpenCV images
before going forward, it is important to know how openCV handles images. when an cv2.imread is used on an image file. the returned object is an array of arrays of arrays. The overarching array (len: image_height) represents the image as a whole, the first level of nested arrays (len: image_width) represents a row of pixels. in each row, the values contained are each pixel, an array (len: 3, dependent on color space) of BGR color values; yes BGR, not RGB, the method that colors are stored can be converted using openCV. 

In [11]:
img = cv2.imread(files[0], cv2.IMREAD_COLOR)
img[0]

array([[91, 85, 78],
       [92, 86, 79],
       [93, 87, 80],
       ...,
       [81, 86, 85],
       [81, 86, 85],
       [81, 86, 85]], dtype=uint8)

For this reason, I made a dictionary matching colors with an array representing its RGB values. (I will convert the images to rgb later on)

In [12]:
from skycolors import rgb_colors
rgb_colors

{'DARK_BLUE': [0, 0, 153],
 'LIGHT_BLUE': [140, 236, 255],
 'CYAN': [64, 255, 223],
 'VIOLET': [52, 27, 179],
 'PURPLE': [102, 0, 204],
 'PINK': [248, 166, 255],
 'ORANGE': [255, 187, 51],
 'RED': [191, 19, 19],
 'YELLOW': [255, 232, 115]}

Along with this, I have a function to quantify the distance between two RGB colors, that will be used to compare the color of each indivisual pixel to the colors in the dictionary

In [13]:
def RGB_dist(color1, color2=[0,0,0]):
    channels = zip(color1, color2)
    sum_distance_squared = 0
    for c1, c2 in channels:
        sum_distance_squared += (c1 - c2) ** 2
    return math.sqrt(sum_distance_squared) 

this code will ioen an image file, take a cropped version of it, and scan each pixel, adding to the frequency of whuchever color the current pixel is closest to in a "vector" dictionary, for the sake of time in this example, only a small portion of the image will be used, and the image will be smaller than normal

In [14]:

def vecFromFilename(filename):
    colorVec = {}
    colorVec["filename"] = filename
    if "daytime" in filename:
        colorVec["category"] = "daytime"
    elif "sunset" in filename:
        colorVec["category"] = "sunset"
    elif "night" in filename:
        colorVec["category"] = "night"
    else:
        print("color category::Something has gone wrong")

    for c in rgb_colors:
        colorVec[c] = 0

    img = cv2.imread(filename, cv2.IMREAD_COLOR)
    #print(img[0][0])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # print(img[0][0])
    # print(RGB_dist( img[0][0]),rgb_colors["DARK_BLUE"])
    img_width = 360
    img = image_resize(img,width=img_width)
    # Cropping an image
    #img = img[0:img_width, 0:img_width]
    
    for region in img[0:100]:
        sumDist = 0
        for pixel in region:
            matchColor = closestColor(pixel,rgb_colors)
            colorVec[matchColor] += 1

        #print("sumDist:",(sumDist/len(region)))

    # df = pd.DataFrame(data=colorVec)
    print(colorVec)
    return colorVec


def closestColor(target, dict):
    minDist = -1
    matchColor = ""

    for color in dict:
        dist = RGB_dist(target,dict[color])
        if (minDist == -1) or (dist < minDist):
            minDist = dist
            matchColor = color

    return matchColor



def image_resize(image, width = None, height = None, inter = cv2.INTER_AREA):
    # initialize the dimensions of the image to be resized and
    # grab the image size
    dim = None
    (h, w) = image.shape[:2]

    # if both the width and height are None, then return the
    # original image
    if width is None and height is None:
        return image

    # check to see if the width is None
    if width is None:
        # calculate the ratio of the height and construct the
        # dimensions
        r = height / float(h)
        dim = (int(w * r), height)

    # otherwise, the height is None
    else:
        # calculate the ratio of the width and construct the
        # dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # resize the image
    resized = cv2.resize(image, dim, interpolation = inter)

    # return the resized image
    return resized


In [15]:

def mergeVectors(vectors):
    data_dict = {}
    data_dict["filename"] = []
    data_dict["category"] = []

    for c in rgb_colors:
        data_dict[c] = []

    for vec in vectors:
        for key in vec:
            data_dict[key].append(vec[key])

    
    data = pd.DataFrame(data_dict)
    print(data)
    return data

This will take all of the images gathered by glob, make a vector from the image, and merge them into one pandas dataframe to ship off to a csv file

In [21]:
vectors = []
for folder in imageFolders:
    for imageFile in folder:
        print(imageFile)
        vectors.append(vecFromFilename(imageFile))


data  = mergeVectors(vectors)
outfileName = input("enter output file name: ")
data.to_csv(outfileName)

images\daytime\daytime_2.jpg
{'filename': 'images\\daytime\\daytime_2.jpg', 'category': 'daytime', 'DARK_BLUE': 3077, 'LIGHT_BLUE': 29133, 'CYAN': 0, 'VIOLET': 1529, 'PURPLE': 0, 'PINK': 29, 'ORANGE': 69, 'RED': 2141, 'YELLOW': 22}
images\daytime\daytime_3.jpg
{'filename': 'images\\daytime\\daytime_3.jpg', 'category': 'daytime', 'DARK_BLUE': 0, 'LIGHT_BLUE': 25873, 'CYAN': 6572, 'VIOLET': 1522, 'PURPLE': 0, 'PINK': 2033, 'ORANGE': 0, 'RED': 0, 'YELLOW': 0}
images\daytime\daytime_1.jpg
{'filename': 'images\\daytime\\daytime_1.jpg', 'category': 'daytime', 'DARK_BLUE': 5, 'LIGHT_BLUE': 18582, 'CYAN': 1432, 'VIOLET': 15791, 'PURPLE': 0, 'PINK': 190, 'ORANGE': 0, 'RED': 0, 'YELLOW': 0}
images\night\night_3.jpg
{'filename': 'images\\night\\night_3.jpg', 'category': 'night', 'DARK_BLUE': 18231, 'LIGHT_BLUE': 0, 'CYAN': 0, 'VIOLET': 17769, 'PURPLE': 0, 'PINK': 0, 'ORANGE': 0, 'RED': 0, 'YELLOW': 0}
images\night\night_2.jpg
{'filename': 'images\\night\\night_2.jpg', 'category': 'night', 'DARK_B