## Notebook Purposes
This notebook is used to convert the existing dataset images to numpy arrays, pairing them up with other images to increase input complexity.

In [1]:
# Create new CSV files which only include examples that contain cloud filtered images along with composite images

import pandas as pd
import glob

#read through all files in training, val, and test sets
train = pd.read_csv('../input/forestnet/ForestNetDataset/train.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test.csv')

train_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
val_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
test_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}


#check to see if cloud filtered image is available
for example_set_df, set_dict, set_path in zip([train, val, test], [train_dict, val_dict, test_dict], ['train_new.csv', 'val_new.csv', 'test_new.csv']):   
    for i in range(example_set.shape[0]):
            img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
            list_of_images = glob.glob(img_path + '/*.png')
            if len(list_of_images) > 1:
                  for col in ['label', 'merged_label', 'latitude', 'longitude', 'year', 'example_path']:
                    set_dict[col].append(example_set_df[col][i])
    set_df = pd.DataFrame(set_dict)
    set_df.to_csv('../input/forestnet/ForestNetDataset/' + set_path, index=False)

            


In [10]:
# Create new CSV files which only include examples that have the Plantation merged_label

import pandas as pd
import glob

#read through all files in training, val, and test sets
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')

train_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
val_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
test_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}

plantation_labels = set()

#check to see if cloud filtered image is available
for example_set_df, set_dict, set_path in zip([train, val, test], [train_dict, val_dict, test_dict], ['train_plantation.csv', 'val_plantation.csv', 'test_plantation.csv']):   
    for i in range(example_set_df.shape[0]):
      img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
      #filter out all plantations
      if example_set_df['merged_label'][i] == 'Plantation':
            for col in ['label', 'merged_label', 'latitude', 'longitude', 'year', 'example_path']:
              set_dict[col].append(example_set_df[col][i])
              plantation_labels.add(example_set_df['label'][i])
    set_df = pd.DataFrame(set_dict)
    set_df.to_csv('../input/forestnet/ForestNetDataset/' + set_path, index=False)

    print(f'{set_path} is {set_df.shape[0]/example_set_df.shape[0] * 100}% the size of the corresponding "new" set.')

print(plantation_labels)      


train_plantation.csv is 43.16436251920123% the size of the corresponding "new" set.
val_plantation.csv is 45.811518324607334% the size of the corresponding "new" set.
test_plantation.csv is 39.961759082217974% the size of the corresponding "new" set.
{'Other large-scale plantations', 'Timber plantation', 'Oil palm plantation'}


In [2]:
# SOLUTION TO ONLY INCLUDE EXAMPLES WITH COMPOSITE AND CLOUD FILTERED IMAGES USING np.concatenate
from PIL import Image 
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')


for example_set_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
        #get list of files in directory
        list_of_files = glob.glob(img_path + '/*.png')
        #loads example's composite image
        composite_img = Image.open(list_of_files[-1])
        #loads most recent cloud filtered image
        cloudf_img = Image.open(list_of_files[-2])
        #convert to np-array
        np_composite_img = np.asarray(composite_img)
        np_cloudf_img = np.asarray(cloudf_img)
        # COULD ALSO USE DIFFERENT AXIS TO GET NON-SQUARE IMAGE, I DON'T THINK THAT'S A GOOD IDEA
        np_new = np.concatenate((np_composite_img, np_cloudf_img), 2)
        np.save(img_path + "/concatenated_images.npy", np_new)

In [None]:
# SOLUTION TO ONLY INCLUDE EXAMPLES WITH COMPOSITE AND CLOUD FILTERED IMAGES USING PIL.Image.blend
from PIL import Image
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')


for example_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
        #get list of files in directory
        list_of_files = glob.glob(img_path + '/*.png')
        #loads example's composite image
        composite_img = Image.open(list_of_files[-1])
        #loads most recent cloud filtered image
        cloudf_img = Image.open(list_of_files[-2])
        blended_img = Image.blend(composite_img, cloudf_img, .5)
        blended_img.save(img_path + '/blended_images.png')   

In [15]:
# Doesn't work due to npy arrays having 6 channels (R,G,B,R,G,B)
""" 
import numpy as np
from PIL import Image
npy_path = '../input/forestnet/ForestNetDataset/examples/0.01805967191228_101.57018668487896/images/visible/concatenated_images.npy'
conc_npy = np.load(npy_path)
conc_img = Image.fromarray(conc_npy)
conc_img.save('../input/forestnet/ForestNetDataset/0.01805967191228_101.57018668487896/images/visible/concatenated_images')
"""

" \nimport numpy as np\nfrom PIL import Image\nnpy_path = '../input/forestnet/ForestNetDataset/examples/0.01805967191228_101.57018668487896/images/visible/concatenated_images.npy'\nconc_npy = np.load(npy_path)\nconc_img = Image.fromarray(conc_npy)\nconc_img.save('../input/forestnet/ForestNetDataset/0.01805967191228_101.57018668487896/images/visible/concatenated_images')\n"