In [1]:
import numpy as np 
import pandas as pd 
import json
import re
import matplotlib.pyplot as plt
import os
import torch
from torch import nn 
import pathlib
import glob
from tqdm.notebook import tqdm

In [2]:
def read_record(record_id,directory):
    record_data = {}
    for x in ['band_11','band_14','band_15','human_pixel_masks']:
        record_data[x] = np.load(os.path.join(directory,record_id,x + '.npy'))
        
    return record_data
            

In [3]:
base_dir = '/kaggle/input/google-research-identify-contrails-reduce-global-warming'


_T11_BOUNDS = (243,303)
_CLOUD_TOP_TDIFF_BOUNDS = (-4,5)
_TDIFF_BOUNDS = (-4,2)

def normalize_range(data,bounds):
    return (data-bounds[0]) / (bounds[1] - bounds[0])

N_TIMES_BEFORE = 4


In [4]:
def get_false_color(record_data):
    r = normalize_range(record['band_15'] - record['band_14'],_TDIFF_BOUNDS)
    g = normalize_range(record['band_14'] - record['band_11'],_CLOUD_TOP_TDIFF_BOUNDS)
    b = normalize_range(record['band_14'],_T11_BOUNDS)

    false_color = np.clip(np.stack([r,g,b],axis=2),0,1)
    img = false_color[...,N_TIMES_BEFORE]
    
    return img

In [5]:
train_path = os.path.join(base_dir, 'train')
train = {}
new_path = pathlib.Path('Train')
new_path.mkdir(exist_ok = True, parents = True)

for dirpath, paths, filenames in tqdm(os.walk(train_path)):
    for path in paths:
        record = read_record(path,dirpath)
        img = get_false_color(record)
        final = np.dstack((img,record['human_pixel_masks']))
        final = final.astype(np.float16)
        
        pathc = f'{new_path}/{path}.npy'
        np.save(pathc,final)

0it [00:00, ?it/s]

In [6]:
valid_path = os.path.join(base_dir,'validation')
new_valid_path = pathlib.Path('Valid')
new_valid_path.mkdir(exist_ok = True, parents = True)

for dirpath, paths , filenames in os.walk(valid_path):
    for path in paths:
        record = read_record(path,dirpath)
        img = get_false_color(record)
        final = np.dstack((img,record['human_pixel_masks']))
        final.astype(np.float16)
        
        pathc = f'{new_valid_path}/{path}.npy'
        np.save(pathc,final)
        