## Notebook Purposes
This notebook is used to convert the existing dataset images to numpy arrays, pairing them up with other images to increase input complexity.

In [1]:
# Create new CSV files which only include examples that contain cloud filtered images along with composite images

import pandas as pd
import glob

#read through all files in training, val, and test sets
train = pd.read_csv('../input/forestnet/ForestNetDataset/train.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test.csv')

train_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
val_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
test_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}


#check to see if cloud filtered image is available
for example_set_df, set_dict, set_path in zip([train, val, test], [train_dict, val_dict, test_dict], ['train_new.csv', 'val_new.csv', 'test_new.csv']):   
    for i in range(example_set_df.shape[0]):
            img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
            list_of_images = glob.glob(img_path + '/*.png')
            if len(list_of_images) > 1:
                  for col in ['label', 'merged_label', 'latitude', 'longitude', 'year', 'example_path']:
                    set_dict[col].append(example_set_df[col][i])
    set_df = pd.DataFrame(set_dict)
    set_df.to_csv('../input/forestnet/ForestNetDataset/' + set_path, index=False)

            


## ! Run the block directly below before running any other blocks below it !

In [1]:
# Read new CSV files and create dictionaries for all sets

import pandas as pd
import glob

#read through all files in training, val, and test sets
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')

train_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
val_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
test_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []} 


In [3]:
# Run this block to: create new CSV files which only include examples that have the Plantation merged_label ()
plantation_labels = set()

#check to see if cloud filtered image is available
for example_set_df, set_dict, set_path in zip([train, val, test], [train_dict, val_dict, test_dict], ['train_plantation.csv', 'val_plantation.csv', 'test_plantation.csv']):   
    for i in range(example_set_df.shape[0]):
      img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
      #filter out all plantations
      if example_set_df['merged_label'][i] == 'Plantation':
            for col in ['label', 'merged_label', 'latitude', 'longitude', 'year', 'example_path']:
              set_dict[col].append(example_set_df[col][i])
              plantation_labels.add(example_set_df['label'][i])
    set_df = pd.DataFrame(set_dict)
    set_df.to_csv('../input/forestnet/ForestNetDataset/' + set_path, index=False)

    print(f'{set_path} is {set_df.shape[0]/example_set_df.shape[0] * 100}% the size of the corresponding "new" set.')

print(plantation_labels)     

train_p&sa.csv is 78.03379416282642% the size of the corresponding "new" set.
val_p&sa.csv is 77.22513089005236% the size of the corresponding "new" set.
test_p&sa.csv is 74.18738049713193% the size of the corresponding "new" set.
{('Smallholder agriculture', 'Small-scale mixed plantation'), ('Smallholder agriculture', 'Small-scale agriculture'), ('Plantation', 'Timber plantation'), ('Plantation', 'Oil palm plantation'), ('Plantation', 'Other large-scale plantations'), ('Smallholder agriculture', 'Small-scale oil palm plantation')}


In [None]:
# Run this block to: create new CSV files which only include examples that have the Plantation and Smallholder Agriculture merged_label(s)


unique_labels = set()

#check to see if cloud filtered image is available
for example_set_df, set_dict, set_path in zip([train, val, test], [train_dict, val_dict, test_dict], ['train_p&sa.csv', 'val_p&sa.csv', 'test_p&sa.csv']):   
    for i in range(example_set_df.shape[0]):
      img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
      #filter out all plantations
      if example_set_df['merged_label'][i] in ['Plantation', 'Smallholder agriculture'] :
            for col in ['label', 'merged_label', 'latitude', 'longitude', 'year', 'example_path']:
              set_dict[col].append(example_set_df[col][i])
              unique_labels.add((example_set_df['merged_label'][i], example_set_df['label'][i]))
    set_df = pd.DataFrame(set_dict)
    set_df.to_csv('../input/forestnet/ForestNetDataset/' + set_path, index=False)

    print(f'{set_path} is {set_df.shape[0]/example_set_df.shape[0] * 100}% the size of the corresponding "new" set.')

print(unique_labels)      

In [4]:
# Run this block to: generate new images by alpha composite blending the most recent cloud filtered image with the composite image, with alpha value a.
from PIL import Image
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')

a = 0.15

for example_set_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
        #get list of files in directory
        list_of_files = glob.glob(img_path + '/*.png')
        #loads example's composite image
        composite_img = Image.open(list_of_files[-1])
        #loads most recent cloud filtered image
        cloudf_img = Image.open(list_of_files[-2])
        blended_img = Image.blend(composite_img, cloudf_img, a)
        blended_img.save(img_path + f'/blended_images_alpha_{a}.png')   

In [16]:
# Run this block to: generate new images by alpha composite blending the most composite image with its infrared twin, using alpha value a.
from PIL import Image
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')

a = 0.5

for example_set_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/'
        composite_img = Image.open(img_path + 'visible/composite.png')
        #load infrared image
        comp_ir_npy = np.load(img_path + 'infrared/composite.npy') 
        comp_ir_img = Image.fromarray(comp_ir_npy, mode = "RGB")    
        blended_img = Image.blend(composite_img, comp_ir_img, a)
        blended_img.save(img_path + f'visible/blended_infrared_{a}.png')   

In [2]:
# Run this block to: generate new images by concatenating composite and cloud filtered images as NumPy arrays using np.concatenate
from PIL import Image 
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')


for example_set_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
        #get list of files in directory
        list_of_files = glob.glob(img_path + '/*.png')
        #loads example's composite image
        composite_img = Image.open(list_of_files[-1])
        #loads most recent cloud filtered image
        cloudf_img = Image.open(list_of_files[-2])
        #convert to np-array
        np_composite_img = np.asarray(composite_img)
        np_cloudf_img = np.asarray(cloudf_img)
        # COULD ALSO USE DIFFERENT AXIS TO GET NON-SQUARE IMAGE, I DON'T THINK THAT'S A GOOD IDEA
        np_new = np.concatenate((np_composite_img, np_cloudf_img), 2)
        np.save(img_path + "/concatenated_images.npy", np_new)