## Notebook Purposes
This notebook is used to convert the existing dataset images to numpy arrays, pairing them up with other images to increase input complexity.

In [24]:
# Create new CSV files which only include examples that contain cloud filtered images along with composite images

import pandas as pd
import glob

#read through all files in training, val, and test sets
train = pd.read_csv('../input/forestnet/ForestNetDataset/train.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test.csv')

train_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
val_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}
test_dict = {'label':[], 'merged_label': [], 'latitude': [], 'longitude': [], 'year': [], 'example_path': []}


#check to see if cloud filtered image is available
for example_set_df, set_dict, set_path in zip([train, val, test], [train_dict, val_dict, test_dict], ['train_new.csv', 'val_new.csv', 'test_new.csv']):   
    for i in range(example_set_df.shape[0]):
            img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
            list_of_images = glob.glob(img_path + '/*.png')
            if len(list_of_images) > 1:
                  for col in ['label', 'merged_label', 'latitude', 'longitude', 'year', 'example_path']:
                    set_dict[col].append(example_set_df[col][i])
    set_df = pd.DataFrame(set_dict)
    set_df.to_csv('../input/forestnet/ForestNetDataset/' + set_path, index=False)

            


In [25]:
# SOLUTION TO ONLY INCLUDE EXAMPLES WITH COMPOSITE AND CLOUD FILTERED IMAGES using np.concatenate
from PIL import Image
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')

for example_set_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
        #retrieve all image paths for example
        list_of_images = glob.glob(img_path + "/*.png")
        #loads example's composite image
        composite_img = Image.open(list_of_images[-1])
        #loads most recent cloud filtered image
        cloudf_img = Image.open(list_of_images[-2])
        #convert to np-array
        np_composite_img = np.asarray(composite_img)
        np_cloudf_img = np.asarray(cloudf_img)
        # COULD ALSO USE DIFFERENT AXIS TO GET NON-SQUARE IMAGE, I DON'T THINK THAT'S A GOOD IDEA
        np_new = np.concatenate((np_composite_img, np_cloudf_img), 2)
        np.save(img_path + "/concatenated_images.npy", np_new)

    

In [1]:
# SOLUTION TO ONLY INCLUDE EXAMPLES WITH COMPOSITE AND CLOUD FILTERED IMAGES USING PIL.Image.blend
from PIL import Image
import numpy as np
import pandas as pd
import glob

#path to set csv files
train = pd.read_csv('../input/forestnet/ForestNetDataset/train_new.csv')
val = pd.read_csv('../input/forestnet/ForestNetDataset/val_new.csv')
test = pd.read_csv('../input/forestnet/ForestNetDataset/test_new.csv')

for example_set_df in [train, val, test]:
    for i in range(example_set_df.shape[0]):
        img_path = f'../input/forestnet/ForestNetDataset/{example_set_df["example_path"][i]}/images/visible'
        #retrieve all image paths for example
        list_of_images = glob.glob(img_path + "/*.png")
        #loads example's composite image
        composite_img = Image.open(list_of_images[-1])
        #loads most recent cloud filtered image
        cloudf_img = Image.open(list_of_images[-2])
        #blend images
        blended_images = Image.blend(composite_img, cloudf_img, 0.5)
        blended_images.save(img_path + "/blended_images.png")

    

In [3]:
import torch
import torchvision.transforms as transforms 
import numpy as np

transform = transforms.Compose([ 
    transforms.ToTensor() 
]) 

image = np.load('../input/forestnet/ForestNetDataset/examples/0.01805967191228_101.57018668487896/images/visible/concatenated_images.npy')
# Convert the image to Torch tensor 
tensor = transform(image) 
  
# print the converted image tensor 
print(tensor) 

tensor([[[0.0667, 0.0627, 0.0627,  ..., 0.1098, 0.0980, 0.0863],
         [0.0667, 0.0667, 0.0627,  ..., 0.1686, 0.1725, 0.1686],
         [0.0667, 0.0627, 0.0627,  ..., 0.1608, 0.1569, 0.1961],
         ...,
         [0.0863, 0.0863, 0.0784,  ..., 0.1412, 0.1373, 0.1294],
         [0.0784, 0.0863, 0.0902,  ..., 0.1333, 0.1333, 0.1294],
         [0.0784, 0.0863, 0.0902,  ..., 0.1412, 0.1373, 0.1373]],

        [[0.0941, 0.0941, 0.0902,  ..., 0.1373, 0.1216, 0.1137],
         [0.0941, 0.0941, 0.0941,  ..., 0.1961, 0.2000, 0.2078],
         [0.0941, 0.0980, 0.0941,  ..., 0.1765, 0.1765, 0.2118],
         ...,
         [0.1373, 0.1373, 0.1255,  ..., 0.1373, 0.1373, 0.1412],
         [0.1294, 0.1333, 0.1373,  ..., 0.1294, 0.1373, 0.1412],
         [0.1294, 0.1333, 0.1373,  ..., 0.1490, 0.1451, 0.1451]],

        [[0.0510, 0.0471, 0.0510,  ..., 0.0784, 0.0510, 0.0510],
         [0.0510, 0.0510, 0.0471,  ..., 0.1373, 0.1373, 0.1451],
         [0.0510, 0.0510, 0.0510,  ..., 0.1569, 0.1412, 0.