# Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches as patches
from tqdm import tqdm
from astropy.visualization import simple_norm
import pickle
from skimage import io

from scipy.stats import stats
import math
import os

import datetime
datecode = '{}-{}-{}'.format(datetime.datetime.now().year, datetime.datetime.now().month, datetime.datetime.now().day)

interim_dir = '/mnt/c/Projects/Blogs/AusAEM_blog_TS/data/interim/'
processed_dir = '/mnt/c/Projects/Blogs/AusAEM_blog_TS/data/processed/'
raw_dir = '/mnt/c/Projects/Blogs/AusAEM_blog_TS/data/raw/'
external_dir = '/mnt/c/Projects/Blogs/AusAEM_blog_TS/data/external/'

# make new folders in interim and processed directories for today's date

if os.path.join(interim_dir, datecode) not in [x[0] for x in os.walk(interim_dir)]:
    os.mkdir(os.path.join(interim_dir, datecode))
if os.path.join(processed_dir, datecode) not in [x[0] for x in os.walk(processed_dir)]:
    os.mkdir(os.path.join(processed_dir, datecode))

print(datecode)



2022-6-29


In [2]:
QLD_EM = pd.read_csv('/mnt/c/Projects/Blogs/AusAEM_blog_TS/data/processed/QLD_AusEM_interp.csv')

# Window parameter selection
# we want resolution to be ~1/20 or 1/30 of window width
# x interval in dataset is 10 m

window_resolution_m = 20 # x dimension sample interval in m
window_size_m = 500 # window x dimension in m
overlap_wid_m = 200 # ~half window size - must be divisible by window_resolution

# convert m to sample counts
window_size = int(window_size_m / window_resolution_m)
overlap_wid = int(overlap_wid_m / window_resolution_m)

QLD_EM = QLD_EM.iloc[::int(window_resolution_m/20)]

# Normalisation

In [3]:
# 0-1 normalisation per timegate

EMZ_HPRG_list = ['EMZ_HPRG' + '[{}]'.format(i) for i in range(1,16)]
EMZ_HPRG_norm_list = [i + '_norm' for i in EMZ_HPRG_list]
EMZ_HPRG_lognorm_list = [i + '_lognorm' for i in EMZ_HPRG_list]

# 0-1 normalisation per timegate

for timegate in range(len(EMZ_HPRG_list)):
    min = QLD_EM[EMZ_HPRG_list[timegate]].min()
    max = QLD_EM[EMZ_HPRG_list[timegate]].max()
    QLD_EM[EMZ_HPRG_norm_list[timegate]] = (QLD_EM[EMZ_HPRG_list[timegate]] - min) / (max - min)
    QLD_EM[EMZ_HPRG_lognorm_list[timegate]] = np.log10(QLD_EM[EMZ_HPRG_norm_list[timegate]] + 0.00001)
    logmin = QLD_EM[EMZ_HPRG_lognorm_list[timegate]].min()
    logmax = QLD_EM[EMZ_HPRG_lognorm_list[timegate]].max()    
    QLD_EM[EMZ_HPRG_lognorm_list[timegate]] = ((QLD_EM[EMZ_HPRG_lognorm_list[timegate]] - min) / (max - min)) + 1

# Window generation

In [5]:
# Original windows

# generate windowed data
data_list = []
x_list = []
E_list = []
N_list = []
line_list = []

# loop through individual lines
j = 0
for line in tqdm(QLD_EM.Line.unique()):

    data = QLD_EM[QLD_EM.Line == line][EMZ_HPRG_lognorm_list]
    
    # create windows
    for i in range(0,len(data), int(overlap_wid)):
        if i + window_size < len(data):
            # timegate data window
            tile = data.iloc[i:i+window_size]
            image = 255*tile.values
            image = image.astype(np.uint8).T
            fn = '{}/tiles_500m/{}.png'.format(datecode, str(j).zfill(7))
            io.imsave(interim_dir + fn, image, check_contrast=False)
            data_list.append(fn)
            j+=1

            # line distance window
            x_subset = QLD_EM[QLD_EM.Line == line]['x'].iloc[i:i+window_size].values.astype('int16')
            x_list.append(x_subset)

            # easting window
            E_subset = QLD_EM[QLD_EM.Line == line]['E'].iloc[i:i+window_size].values.astype('float32')
            E_list.append(E_subset)

            # northing window
            N_subset = QLD_EM[QLD_EM.Line == line]['N'].iloc[i:i+window_size].values.astype('float32')
            N_list.append(N_subset)

            # line name window
            line_subset = QLD_EM[QLD_EM.Line == line]['Line'].iloc[i:i+window_size].values[0]
            line_list.append(line_subset)

stack = np.array(data_list)
x_stack = np.array(x_list)
E_stack  = np.array(E_list)
N_stack = np.array(N_list)
line_stack = np.array(line_list)

# save these to pickles

with open(interim_dir + '{}/filenames_all_normdata_{}m-window.pickle'.format(datecode, window_size_m), 'wb') as handle1:
    pickle.dump(stack, handle1, protocol=4)

with open(interim_dir + '{}/all_x_{}m-window_stack.pickle'.format(datecode, window_size_m), 'wb') as handle2:
    pickle.dump(x_stack, handle2, protocol=4)

with open(interim_dir + '{}/all_line_{}m-window_stack.pickle'.format(datecode, window_size_m), 'wb') as handle3:
    pickle.dump(line_stack, handle3, protocol=4)

with open(interim_dir + '{}/all_easting_{}m-window_stack.pickle'.format(datecode, window_size_m), 'wb') as handle4:
    pickle.dump(E_stack, handle4, protocol=4)

with open(interim_dir + '{}/all_northing_{}m-window_stack.pickle'.format(datecode, window_size_m), 'wb') as handle5:
    pickle.dump(N_stack, handle5, protocol=4)

100%|██████████| 159/159 [1:15:10<00:00, 28.37s/it]   
