In [None]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

IMG_DIRECTORY = "./images/"

In [None]:
# Function to get all sundays of a particular year

def allsundays(year):
    return pd.date_range(start=str(year), end=str(year+1), 
                         freq='W-SUN').strftime('%m/%d/%Y').tolist()

In [None]:
# Gets the list of all folders present in the images folder
img_list = sorted(os.listdir(IMG_DIRECTORY))

# On analysis, we've found that each image is comprised of only 7 colors given here
unique = [[150,0, 0], [150, 150, 150], [200, 250, 250], [240, 0, 0], [250, 170, 10], [255, 240, 0],  [255,255, 255]]

# For easier computation, here are the grayscale values of the colors
gray_vals = [44, 71, 150, 175, 217, 235, 254]

# Stores all the data
all_data = []

# folder here corresponds to the year
for folder in img_list:
    name = IMG_DIRECTORY + "/" + folder + "/ct5km_baa-max-7d_v3.1_" + folder

    # Stores all the data for a year
    year_data = []

    # Get all the sundays for this year, store it in the variable 'sundays'
    sundays = allsundays(int(folder))

    # for every sunday
    for day in sundays:

        # split the date
        month, date, year = day.split("/")

        # concatenate to get the file name accordingly
        file = name + month + date + ".png"

        # check that the file exists
        if(os.path.exists(file)):
            
            # if the file exists, we read it in RGB format, so that computation is lighter
            img = cv2.imread(file,0)

            # dividing the map into 3 regions - the east pacific, asia atlantic (Atlantic and Indian Ocean)
            # and the water bodies around Australia
            pacific = img[:, :2200]
            asia_atlantic = img[:, 2200:5500]
            australia = img[:,5500:]

            parts = [pacific, asia_atlantic, australia]
        
            # Stores all the data for an image of a year
            img_data = []

            # for every region defined
            for part in parts:

                # we get a histogram of the image
                hist, bin_edges = np.histogram(part.ravel(), 255, range=(0,255))

                # To store all the data for a particular region of the original image
                part_data = []

                # We are only interested in getting the histograms for the 7 colors mentioned above
                for color in gray_vals:
                    part_data.append(hist[color])
                
                img_data.append(part_data)

            year_data.append(img_data) 
    all_data.append(year_data)    