-----------
## In this notebook:
* Filter out rows without a path
* Filter out day images
-----------

In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import glob
import multiprocessing
import shutil
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import warnings
warnings.filterwarnings("ignore")

## Define functions

In [None]:
# remove rows without path
def get_path(x):
    nysm_dir = '/tf/NYSM/archive/nysm/cam_photos/' # get path for images
    time = x['time_5M'] # select the time stamp of each image
    date_path = nysm_dir+time.strftime('%Y')+'/'+time.strftime('%m')+'/'+time.strftime('%d') # convert into a readable string
    site_path = date_path+'/'+x['station'].decode("utf-8")  
    file_path = site_path+'/'+time.strftime('%Y%m%dT%H%M')+'*'
    if(os.path.exists(site_path) and len(glob.glob(file_path))>0):
        return glob.glob(file_path)[0]
    else: return None
    

def loopy(df):
    output_df = df.copy()
    output_df['img_path'] = output_df.apply(
        lambda x: get_path(x),
        axis = 1
    )
    return output_df

In [None]:
# determine day images
def is_day(path):
    image = cv2.imread(path)
    b,g,r = image[:,:,0], image[:,:,1], image[:,:,2]
    if (b==g).all() and (b==r).all(): return None
    else: return True
    
# filter out day images
def loopy_d(df):
    output2_df = df.copy()
    output2_df['is_day'] = output2_df['img_path'].apply(
        lambda x: is_day(x)
    )
    return output2_df

In [None]:
# determine night images
def is_night(path):
    image = cv2.imread(path)
    b,g,r = image[:,:,0], image[:,:,1], image[:,:,2]
    if (b==g).all() and (b==r).all(): return True
    else: return None

# filter out night images
def loopy_n(df):
    output3_df = df.copy()
    output3_df['is_night'] = output3_df['img_path'].apply(
        lambda x: is_night(x)
    )
    return output3_df

## Load DFs

In [None]:
# read in DFs
precip_df = pd.read_pickle("../DFs/precip_df.pkl")
no_precip_df = pd.read_pickle("../DFs/no_precip_df.pkl")

## Working with Precip DF

In [None]:
# first filter out rows without a path for precip
NUM_CORES = 10
df_chunks = np.array_split(precip_df, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    df = pd.concat(pool.map(loopy, df_chunks), ignore_index = True)

In [None]:
#filter out images with no path
precip_df_images = df.loc[df['img_path'].notnull()]
precip_df_images

## Filter out Preicp Day Images

In [None]:
# next filter out day imgages for precip
NUM_CORES = 10
df_chunks = np.array_split(precip_df_images, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    precip_day_df = pd.concat(pool.map(loopy_d, df_chunks), ignore_index = True)

In [None]:
day_precip_df = precip_day_df.loc[precip_day_df['is_day'].notnull()]
day_precip_df

## Sanity Check

In [None]:
day_precip_df = pd.read_pickle("DFs/day_precip_df.pkl")

In [None]:
# change index
day_precip_df['index'] = list(range(0,len(day_precip_df)))

In [None]:
# should be colored
img = mpimg.imread(day_precip_df['img_path'][1584823])
plt.imshow(img)

plt.show()

In [None]:
# save df
day_precip_df.to_pickle("DFs/day_precip_df.pkl")

## Filter out Precip Night Images

In [None]:
# next filter out day imgages for precip
NUM_CORES = 10
df_chunks = np.array_split(precip_df_images, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    precip_night_df = pd.concat(pool.map(loopy_n, df_chunks), ignore_index = True)

In [None]:
night_precip_df = precip_night_df.loc[precip_night_df['is_night'].notnull()]
night_precip_df

In [None]:
# change index
night_precip_df['index'] = list(range(0,len(night_precip_df)))

In [None]:
# save df
night_precip_df.to_pickle("../DFs/night_precip_df.pkl")

In [None]:
# should be colored
img = mpimg.imread(night_precip_df['img_path'].iloc[1034])
plt.imshow(img)

plt.show()

## Working with No Preicp DF

In [None]:
# first filter out rows without a path for precip
NUM_CORES = 10
df_chunks = np.array_split(no_precip_df, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    no_df = pd.concat(pool.map(loopy, df_chunks), ignore_index = True)

In [None]:
len(no_df)

In [None]:
#filter out images with no path
no_precip_df_images = no_df.loc[no_df['img_path'].notnull()]
no_precip_df_images

In [None]:
# open DF
no_precip_df_images = pd.read_pickle("../DFs/no_precip_df_images.pkl")

In [None]:
# change index
no_precip_df_images['index'] = list(range(0,len(no_precip_df_images)))

In [None]:
# save no_precip_df_images
no_precip_df_images.to_pickle("DFs/no_precip_df_images.pkl")

In [None]:
no_precip_df_images = pd.read_pickle("DFs/no_precip_df_images.pkl")

In [None]:
# next filter out day vs night for no precip
NUM_CORES = 10
df_chunks = np.array_split(no_precip_df_images, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    no_day_df = pd.concat(pool.map(loopy_d, df_chunks), ignore_index = True)

In [None]:
day_no_precip_df = no_day_df.loc[no_day_df['is_day'].notnull()]
day_no_precip_df

In [None]:
# night filter
NUM_CORES = 10
df_chunks = np.array_split(no_precip_df_images, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    no_night_df = pd.concat(pool.map(loopy_n, df_chunks), ignore_index = True)

In [None]:
night_no_precip_df = no_night_df.loc[no_night_df['is_night'].notnull()]
night_no_precip_df

In [None]:
# change index
night_no_precip_df['index'] = list(range(0,len(night_no_precip_df)))

In [None]:
night_no_precip