In [None]:
#!/usr/bin/env python3

import argparse
import os
import numpy as np
from sklearn.cluster import Birch
from PIL import Image
import matplotlib.pyplot as plt
import shutil
import pandas as pd

from loader import *
from features import *
from visualisation import *
from merging import *

## Loading data ##
wav, soundings, shift_std, shift_mean, space, time = load_soundings("../data/level_1p0a/*.h5", smoothing="poly", remove_flat=False)
centred_soundings = centre(soundings)

## Creating features ##
grad, polyres, optc, poly_coeffs, curv = create_features(wav, centred_soundings, standardize, "grad", "polyres", "optc", "poly_coeffs", "curv")

features = np.hstack([poly_coeffs[:,0:1], polyres[:,1:2], grad[:,4:5]])

combined_features = np.hstack([standardize(centred_soundings), features])

## Fitting model ##
model = Birch(n_clusters=None, threshold=3, branching_factor=50).fit(combined_features)

minorlabels = model.labels_

## Merging clusters ##
majorlabels = merge_labels(soundings, minorlabels, grad, polyres, poly_coeffs, curv)

finallabels = add_flat_labels(soundings, majorlabels)

In [None]:
marsImage = Image.open("../images/mars.png")
fig = plt.figure(figsize=(15,9))
ax = fig.add_subplot()
ax.grid()
ax.set_title("Locations of highly periodic soundings")
ax.set_xticks(range(-180, 181, 30))
ax.set_yticks(range(-90, 91, 30))
ax.set_xlim(-180, 180)
ax.set_ylim(-90, 90)
    
im = ax.scatter(space[finallabels==5,2], space[finallabels==5,1], c=space[finallabels==5,0])
fig.colorbar(im, ax=ax)
plt.tight_layout()
ax.imshow(marsImage, extent=[-180, 180, 90, -90]);

# Occultation view
classes present and their quantity and altitude and mean transmission level.

In [None]:
def create_occultation_view(time, finallabels):
    """
    Create nested dictionary where for each occultation the classes of spectra can be read along with their
    altitudes and mean transmission levels 
    """
    occ_view = []
    for occ in range(np.unique(time[:,-1]).shape[0]):
        occ_mask = time[:,-1] == np.unique(time[:,-1])[occ]
        occ_view.append({})
        occ_view[occ]["file name"] = np.unique(time[:,-1])[occ]
        for spectralclass in set(finallabels):
            class_mask = finallabels == spectralclass
            occ_view[occ][spectralclass] = (occ_mask & class_mask).sum()
        occ_view[occ]["total"] = occ_mask.sum()
    
    pd.DataFrame(occ_view).to_csv("../data/out/occ_view.csv", index=False)
    
create_occultation_view(time, finallabels)

In [None]:
try:
    shutil.rmtree("../data/out/occ_view/")
    os.makedirs("../data/out/occ_view/")
except FileNotFoundError: pass
except FileExistsError: pass

occ_view = {}
for occ in np.unique(time[:,-1]):
    occ_mask = time[:,-1] == occ
    classes_present = np.unique(finallabels[occ_mask])
    all_classes = np.unique(finallabels)
    tags = np.in1d(all_classes, classes_present)
    if 5 in classes_present:
        periodic_mask = finallabels[occ_mask] == 5
        mean_transmissions = soundings[occ_mask][periodic_mask].mean(axis=1)
        altitudes = space[occ_mask][periodic_mask, 0]
        wavelengths = find_wavelengths(wav, soundings[occ_mask][periodic_mask])
        
occ_view

In [None]:
try:
    shutil.rmtree("../data/out/occ_view/")
    os.makedirs("../data/out/occ_view/")
except FileNotFoundError: pass
except FileExistsError: pass

occ_view = {}
for occ in np.unique(time[:,-1]):
    occ_mask = time[:,-1] == occ
    occ_view[occ] = {}
    for spectralclass in set(finallabels):
        transmission_levels = soundings[(time[:,-1] == occ) & (finallabels == spectralclass)].mean(axis=1)
        altitudes = space[(time[:,-1] == occ) & (finallabels == spectralclass), 0]
        occ_view[occ][spectralclass] = np.vstack([altitudes, transmission_levels])
        
pd.DataFrame(occ_view).T

In [None]:
occ_view = {}
for occ in np.unique(time[:,-1]):
    occ_mask = time[:,-1] == occ
    occ_view[occ] = {}
    for spectralclass in set(finallabels[occ_mask]):
        occ_view[occ][spectralclass] = {}
        transmission_levels = soundings[(time[:,-1] == occ) & (finallabels == spectralclass)].mean(axis=1)
        altitudes = space[(time[:,-1] == occ) & (finallabels == spectralclass), 0]
        occ_view[occ][spectralclass]["mean_transmissions"] = transmission_levels
        occ_view[occ][spectralclass]["altitudes"] = altitudes

pd.DataFrame(occ_view).T

In [None]:
pd.DataFrame(occ_view).T[5][-3]

# Spectral class view
associate classes with altitude, solar longitude, latitude, longitude, mean transmission level

In [None]:
spectralClass = 5
class_mask = majorlabels == spectralClass
alt = space[class_mask, 0]
lat = space[class_mask, 1]
lon = space[class_mask, 2]
ls = time[class_mask, 3]
mean_transmission = soundings[class_mask].mean(axis=1)
occName = time[class_mask, -1]

In [None]:
np.vstack([alt, lat, occName]).T.shape

In [None]:
def create_class_view(wav, soundings, space, time, finallabels):
    """
    Create nested dictionary where for each class the spatial and seasonal distribution as well as the mean transmission levels
    can be read
    """
    try: os.makedirs("../data/out/class_view")
    except FileExistsError: pass
    
    class_view = {}

    for spectralclass in set(finallabels):

        class_mask = finallabels == spectralclass
        
        alt = space[class_mask, 0]
        lat = space[class_mask, 1]
        lon = space[class_mask, 2]
        ls = time[class_mask, 3]
        mean_transmission = soundings[class_mask].mean(axis=1)
        occName = time[class_mask, -1]
        
        if spectralclass == 5:
            wavs = find_wavelengths(wav, soundings[class_mask])
            class_view[spectralclass] = np.vstack([ls, lat, lon, alt, mean_transmission, occName, wavs.T]).T
            
            pd.DataFrame(class_view[spectralclass]).to_csv(f"../data/out/class_view/class {spectralclass}.csv", 
                                                    index=False, header=["ls", "lat", "lon", "alt", "mean_transmission", 
                                                                         "filename", "primary wavelength", 
                                                                         "secondary wavelength"])
        
        else:
            class_view[spectralclass] = np.vstack([ls, lat, lon, alt, mean_transmission, occName]).T
            pd.DataFrame(class_view[spectralclass]).to_csv(f"../data/out/class_view/class {spectralclass}.csv", index=False,
                                                    header=["ls", "lat", "lon", "alt", "mean_transmission", "filename"])
        
create_class_view(wav, soundings, space, time, finallabels)

In [None]:
class_view[5]

In [None]:
marsImage = Image.open("../mars.png")
fig = plt.figure(figsize=(15,9))
ax = fig.add_subplot()
ax.grid()
ax.set_title("Locations of highly periodic soundings")
ax.set_xticks(range(-180, 181, 30))
ax.set_yticks(range(-90, 91, 30))
ax.set_xlim(-180, 180)
ax.set_ylim(-90, 90)
    
im = ax.scatter(class_view[5]["lon"], class_view[5]["lat"], c=class_view[5]["alt"])
fig.colorbar(im, ax=ax)
plt.tight_layout()
ax.imshow(marsImage, extent=[-180, 180, 90, -90]);

# Wavelengths

In [None]:
def find_wavelengths(wav, soundings):
    """
    Calculate wavelengths present in soundings by using the gradient to calculate peak to trough distances
    """
    dsoundings = np.gradient(soundings, axis=1, edge_order=2)
    sign = np.sign(dsoundings)
    wavs = []
    for i in range(soundings.shape[0]):
        count = np.array((np.roll(sign[i], -1) - sign[i])[:-1], dtype="bool")
        idx = np.argwhere(count).flatten()
        wavs.append((wav[idx] - np.roll(wav[idx], 1))[1:])
    
    wavs = np.array(wavs, dtype=object)*2
    wav1 = np.zeros(soundings.shape[0])
    wav2 = np.zeros(soundings.shape[0])
    
    for i in range(len(wavs)):
        try: wav1[i] = wavs[i][0]
        except IndexError: wav1[i] = np.NAN
        
        try: wav2[i] = wavs[i][1]
        except IndexError: wav2[i] = np.NAN
    
    wavs = np.vstack([wav1, wav2]).T
        
    return wavs

In [None]:
find_wavelengths(wav, soundings[finallabels==5]).shape

# Classify new occultation

In [None]:
newpath = "../data/level_1p0a/"

In [None]:
c = 0
new_sounding = soundings[-1]
centroidc = model.subcluster_centers_[c]
grad, polyres, poly_coeffs, curv = create_features(wav, new_sounding.reshape(1,-1), standardize, "grad", "polyres", "poly_coeffs", "curv")