-----------
## In this notebook:
* Filter out rows without a path
* Filter out day vs night images
* Copy day time images to folders
-----------

In [1]:
import pandas as pd
import numpy as np
import cv2
import os
import glob
import multiprocessing

import warnings
warnings.filterwarnings("ignore")

## Define functions

In [2]:
# remove rows without path
def get_path(x):
    nysm_dir = '/tf/NYSM/archive/nysm/cam_photos/' # get path for images
    time = x['time_5M'] # select the time stamp of each image
    date_path = nysm_dir+time.strftime('%Y')+'/'+time.strftime('%m')+'/'+time.strftime('%d') # convert into a readable string
    site_path = date_path+'/'+x['station'].decode("utf-8")  
    file_path = site_path+'/'+time.strftime('%Y%m%dT%H%M')+'*'
    if(os.path.exists(site_path) and len(glob.glob(file_path))>0):
        return glob.glob(file_path)[0]
    else: return None
    

def loopy(df):
    output_df = df.copy()
    output_df['img_path'] = output_df.apply(
        lambda x: get_path(x),
        axis = 1
    )
    return output_df

In [5]:
# determine day images
def is_day(path):
    image = cv2.imread(path)
    b,g,r = image[:,:,0], image[:,:,1], image[:,:,2]
    if (b==g).all() and (b==r).all(): return None
    else: return True
    
# filter out day images
def loopy_d(df):
    output2_df = df.copy()
    output2_df['is_day'] = output2_df['img_path'].apply(
        lambda x: is_day(x)
    )
    return output2_df

In [6]:
# determine night images
def is_night(path):
    image = cv2.imread(path)
    b,g,r = image[:,:,0], image[:,:,1], image[:,:,2]
    if (b==g).all() and (b==r).all(): return True
    else: return None

# filter out night images
def loopy_n(df):
    output3_df = df.copy()
    output3_df['is_night'] = output3_df['img_path'].apply(
        lambda x: is_night(x)
    )
    return output3_df

## Load DFs

In [5]:
# read in DFs
precip_df = pd.read_pickle("DFs/precip_df.pkl")
no_precip_df = pd.read_pickle("DFs/no_precip_df.pkl")

## Working with Precip DF

In [6]:
# first filter out rows without a path for precip
NUM_CORES = 10
df_chunks = np.array_split(precip_df, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    df = pd.concat(pool.map(loopy, df_chunks), ignore_index = True)

In [8]:
#filter out images with no path
precip_df_images = df.loc[df['img_path'].notnull()]
precip_df_images

Unnamed: 0,index,station,time_5M,tair,ta9m,precip,precip_total,precip_max_intensity,snow_depth,precip_diff,img_path
65,65,b'SCHU',2015-08-13 16:45:00,21.526239,21.358690,0.146999,24.986000,0.000,,0.146999,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
66,66,b'SCHU',2015-08-13 16:50:00,20.540680,20.426950,0.212000,25.132999,0.000,,0.065001,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
67,67,b'SCHU',2015-08-13 17:20:00,20.132040,20.003860,0.520000,25.198000,0.000,,0.308001,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
68,68,b'SCHU',2015-08-13 17:25:00,19.782150,19.688551,0.607000,25.506001,0.000,,0.087000,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
69,69,b'SCHU',2015-08-13 17:30:00,19.587971,19.559811,0.975000,25.593000,0.127,,0.368000,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
...,...,...,...,...,...,...,...,...,...,...,...
3466225,3466225,b'YORK',2020-12-28 11:00:00,1.457623,1.492350,0.840004,82.279999,0.000,0.000236,0.050003,/tf/NYSM/archive/nysm/cam_photos/2020/12/28/YO...
3466226,3466226,b'YORK',2020-12-30 21:10:00,5.567594,5.716432,0.110001,82.330002,0.000,-0.004039,0.110001,/tf/NYSM/archive/nysm/cam_photos/2020/12/30/YO...
3466227,3466227,b'YORK',2020-12-30 22:20:00,4.348372,4.467166,0.299995,82.440002,0.000,-0.004136,0.189995,/tf/NYSM/archive/nysm/cam_photos/2020/12/30/YO...
3466228,3466228,b'YORK',2020-12-30 22:45:00,4.160859,4.248825,0.409996,82.629997,0.000,-0.003529,0.110001,/tf/NYSM/archive/nysm/cam_photos/2020/12/30/YO...


## Sanity Check

In [None]:
# should have precipitation
plt.imshow(precip_df_images['img_path'][0])

## Filter out Preicp Day Images

In [64]:
# next filter out day imgages for precip
NUM_CORES = 10
df_chunks = np.array_split(precip_df_images, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    precip_day_df = pd.concat(pool.map(loopy_d, df_chunks), ignore_index = True)

In [65]:
day_precip_df = precip_day_df.loc[precip_day_df['is_day'].notnull()]
day_precip_df

Unnamed: 0,index,station,time_5M,tair,ta9m,precip,precip_total,precip_max_intensity,snow_depth,precip_diff,img_path,is_day
0,65,b'SCHU',2015-08-13 16:45:00,21.526239,21.358690,0.146999,24.986000,0.000,,0.146999,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...,True
1,66,b'SCHU',2015-08-13 16:50:00,20.540680,20.426950,0.212000,25.132999,0.000,,0.065001,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...,True
2,67,b'SCHU',2015-08-13 17:20:00,20.132040,20.003860,0.520000,25.198000,0.000,,0.308001,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...,True
3,68,b'SCHU',2015-08-13 17:25:00,19.782150,19.688551,0.607000,25.506001,0.000,,0.087000,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...,True
4,69,b'SCHU',2015-08-13 17:30:00,19.587971,19.559811,0.975000,25.593000,0.127,,0.368000,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...,True
...,...,...,...,...,...,...,...,...,...,...,...,...
2073158,3466086,b'YORK',2020-12-24 21:45:00,7.989942,8.129804,2.320000,61.520000,0.000,-0.004730,0.149998,/tf/NYSM/archive/nysm/cam_photos/2020/12/24/YO...,True
2073159,3466087,b'YORK',2020-12-24 21:50:00,7.869830,8.115767,2.360001,61.669998,0.000,-0.008403,0.040001,/tf/NYSM/archive/nysm/cam_photos/2020/12/24/YO...,True
2073160,3466088,b'YORK',2020-12-24 21:55:00,7.515274,8.021632,2.520000,61.709999,0.000,-0.007266,0.160000,/tf/NYSM/archive/nysm/cam_photos/2020/12/24/YO...,True
2073179,3466211,b'YORK',2020-12-25 19:35:00,-0.835582,-1.015465,14.009995,80.930000,0.000,-0.007240,0.089996,/tf/NYSM/archive/nysm/cam_photos/2020/12/25/YO...,True


## Sanity Check

In [None]:
# should be colored
plt.imshow(day_precip_df['img_path'][0])

In [67]:
# save df
day_precip_df.to_pickle("DFs/day_precip_df.pkl")

In [None]:
# copy images into precip folder
destination = "/home/xcite/kokamura/Precip/Precip_NoPrecip/Precip"
for image in day_precip_df['img_path']:
    shutil.copy(image, destination)

## Working with No Preicp DF

In [68]:
# first filter out rows without a path for precip
NUM_CORES = 10
df_chunks = np.array_split(no_precip_df, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    no_df = pd.concat(pool.map(loopy, df_chunks), ignore_index = True)

In [69]:
len(no_df)

51950185

In [70]:
#filter out images with no path
no_precip_df_images = no_df.loc[no_df['img_path'].notnull()]
no_precip_df_images

Unnamed: 0,index,station,time_5M,tair,ta9m,precip,precip_total,precip_max_intensity,snow_depth,precip_diff,img_path
686,686,b'SCHU',2015-08-13 16:30:00,22.238859,22.051840,0.000000,24.986000,0.0,,0.0,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
687,687,b'SCHU',2015-08-13 16:35:00,21.994610,21.835501,0.000000,24.986000,0.0,,0.0,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
688,688,b'SCHU',2015-08-13 16:40:00,21.792700,21.673880,0.000000,24.986000,0.0,,0.0,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
689,689,b'SCHU',2015-08-13 16:55:00,20.111839,19.999201,0.212000,25.198000,0.0,,0.0,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
690,690,b'SCHU',2015-08-13 17:00:00,20.223181,20.070271,0.212000,25.198000,0.0,,0.0,/tf/NYSM/archive/nysm/cam_photos/2015/08/13/SC...
...,...,...,...,...,...,...,...,...,...,...,...
51950168,51950168,b'YORK',2020-12-31 22:35:00,0.997584,1.036402,1.630005,84.400002,0.0,-0.007279,0.0,/tf/NYSM/archive/nysm/cam_photos/2020/12/31/YO...
51950169,51950169,b'YORK',2020-12-31 22:40:00,0.958507,0.993435,1.630005,84.400002,0.0,-0.007150,0.0,/tf/NYSM/archive/nysm/cam_photos/2020/12/31/YO...
51950170,51950170,b'YORK',2020-12-31 22:45:00,0.900587,0.956663,1.630005,84.400002,0.0,-0.006960,0.0,/tf/NYSM/archive/nysm/cam_photos/2020/12/31/YO...
51950171,51950171,b'YORK',2020-12-31 22:50:00,0.891250,0.927758,1.630005,84.400002,0.0,-0.006929,0.0,/tf/NYSM/archive/nysm/cam_photos/2020/12/31/YO...


In [71]:
# save no_precip_df_images
no_precip_df_images.to_pickle("DFs/no_precip_df_images.pkl")

In [7]:
no_precip_df_images = pd.read_pickle("DFs/no_precip_df_images.pkl")

In [None]:
# next filter out day vs night for no precip
NUM_CORES = 10
df_chunks = np.array_split(no_precip_df_images, NUM_CORES)

with multiprocessing.Pool(NUM_CORES) as pool:
    no_day_df = pd.concat(pool.map(loopy_d, df_chunks), ignore_index = True)

In [None]:
day_no_precip_df = no_day_df.loc[no_day_df['is_day'].notnull()]
day_no_precip_df

In [None]:
# copy images into no precip folder
destination = "/home/xcite/kokamura/Precip/Precip_NoPrecip/No_Precip"
for image in no_precip['img_path']:
    shutil.copy(image, destination)