In [25]:
import os
import re

import django
import fs.path
import numpy as np
import pandas as pd
from django.core.exceptions import ObjectDoesNotExist
from fs.osfs import OSFS
from toolz import keyfilter

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "multidex.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()

from plotter.models import *
from multidex_utils import modeldict

In [26]:
for spec in MSpec.objects.all():
    spec.delete()
MSpec.objects.all()

<QuerySet []>

In [27]:
input_fs = OSFS("/home/michael/Desktop/mcam_spect_data_conversion/data/mcam")
output_fs = OSFS('.')
output_image_dir = output_fs.getsyspath("plotter/application/assets/browse/mcam/")

In [28]:
output_image_dir

'/home/michael/Desktop/multidex/multidex/plotter/application/assets/browse/mcam/'

In [29]:
metaframe = pd.read_csv(input_fs.getsyspath('Metadata-marslab.csv'))
metaframe.columns = [column.lower() for column in metaframe.columns]
# add NaNs back in so we can programmatically delete them
metaframe.replace('-', np.nan, inplace=True)
# we're turning these to ints when we ingest them,
# but python doesn't like statements like int('3.0'),
# so turn to float as an intermediate step
for column in [
    'sol', 'site', 'drive', 'rover_elevation', 'target_elevation', 'tau',
    'focal_distance', 'incidence_angle', 'emission_angle',
    'phase_angle', 'l_s', 'site', 'drive', 'lat', 'lon', 'odometry'
]:
    metaframe[column] = metaframe[column].astype('float')
metaframe['ltst'] = metaframe['ltst'].astype('datetime64')

In [30]:
# lots of missing values and no spectra, skip for now
BAD_MCAMS = ['mcam13523']
overlay_images = [
    image for image in input_fs.listdir('DCS_ROI_images')
]
rgb_images = [
    image for image in input_fs.listdir('RGB_images')
]

In [31]:
def get_image_ordinal(mastcam_image_fn_no_ext):
    ordinal_appendage = re.search(r'_[RL](\d)$', mastcam_image_fn_no_ext)
    if ordinal_appendage:
        return ordinal_appendage.group(1)
    else:
        return '1'

In [32]:
# make our temporary dict of 'shared' observation data
observations = {}
for ix, row in metaframe.iterrows():
    # skip observations we think are 'bad'
    if row['seq_id'] in BAD_MCAMS:
        continue
    # drop NaN-valued fields and populate observation SQL fields
    # from CSV fields
    row.dropna(inplace=True)
    obs = dict(zip(row.index,row.values))
    # this is the canonical prefix for image / spectra files 
    obs_identifier = 'sol' + format(
            int(row['sol']), "0>4d"
        ) + '_' + row['seq_id']
    overlay_image_list = [
        image for image in overlay_images 
        if image.startswith(obs_identifier)
    ]
    rgb_image_list = [
        image for image in rgb_images 
        if image.startswith(obs_identifier)
    ]
    # associate observation with images using the convoluted decision tree
    # that appears to have been used to name the images (usually)
    for image in overlay_image_list:
        basename = fs.path.splitext(image)[0]
        if re.search(r'_R\d.*?_ROIs', basename):
            image_eye = 'righteye'
        elif re.search(r'_L\d.*?_ROIs', basename):
            image_eye = 'lefteye'
        # note that we _want_ this to throw a NameError if image_eye is undefined 
        obs[image_eye + '_roi_image_' + get_image_ordinal(basename)] = image
        
    # note subtle, delicious differences in RGB image naming conventions
    for image in rgb_image_list:
        basename = fs.path.splitext(image)[0]
        if re.search(r'R(_R\d)?$', basename):
            image_eye = 'righteye'
        elif re.search(r'L(_[LR]\d)?$', basename):
            image_eye = 'lefteye'
        obs[image_eye + '_rgb_image_' + get_image_ordinal(basename)] = image

    observations[row['seq_id']] = obs
observations = pd.DataFrame(observations).T
observations

Unnamed: 0,sol,seq_id,name,rover_elevation,target_elevation,tau,ltst,focal_distance,incidence_angle,emission_angle,...,righteye_rgb_image_3,righteye_roi_image_8,righteye_roi_image_6,righteye_roi_image_7,righteye_rgb_image_5,righteye_rgb_image_7,righteye_rgb_image_6,righteye_rgb_image_8,righteye_roi_image_0,lefteye_rgb_image_4
mcam00012,13.0,mcam00012,Goulburn 2x1,-4500.97,-4502.2709,0.722,2021-07-10 13:21:56,6.882,24.7586,62.2825,...,,,,,,,,,,
mcam00014,13.0,mcam00014,Dunes+Mound 1x2,-4500.97,-4496.505,0.722,2021-07-10 13:30:30,42.572,26.5491,93.4542,...,,,,,,,,,,
mcam00119,24.0,mcam00119,Clast Survey,-4502.61,-4503.1598,0.722,2021-07-10 15:32:24,3.574,54.2805,46.7285,...,,,,,,,,,,
mcam00121,25.0,mcam00121,Fractures 2x2,-4502.61,-4500.6437,0.722,2021-07-10 12:32:08,3.574,13.8772,91.0629,...,,,,,,,,,,
mcam00126,25.0,mcam00126,Hepburn (distant),-4502.61,,0.722,2021-07-10 13:01:29,,19.0881,89.4421,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
mcam14177,2705.0,mcam14177,Edinburgh Stereo,,,,NaT,,,,...,,,,,,,,,,
mcam14191,2710.0,mcam14191,Eshaness Stereo,,,,NaT,,,,...,,,,,,,,,,
mcam14203,2712.0,mcam14203,Edinburgh Stereo,-4088.69,,,2021-07-10 12:55:20,,24.9819,,...,,,,,,,,,,
mcam14264,2726.0,mcam14264,Edinburgh Dump Pile,,,,NaT,,,,...,,,,,,,,,,


In [33]:
observations.columns

Index(['sol', 'seq_id', 'name', 'rover_elevation', 'target_elevation', 'tau',
       'ltst', 'focal_distance', 'incidence_angle', 'emission_angle',
       'phase_angle', 'l_s', 'site', 'drive', 'lat', 'lon', 'odometry',
       'lefteye_roi_image_1', 'righteye_roi_image_2', 'righteye_roi_image_1',
       'lefteye_roi_image_2', 'lefteye_rgb_image_1', 'lefteye_rgb_image_2',
       'righteye_rgb_image_2', 'righteye_rgb_image_1', 'righteye_rgb_image_4',
       'lefteye_rgb_image_3', 'lefteye_roi_image_3', 'righteye_roi_image_3',
       'righteye_roi_image_4', 'lefteye_roi_image_4', 'righteye_rgb_image_3',
       'righteye_roi_image_8', 'righteye_roi_image_6', 'righteye_roi_image_7',
       'righteye_rgb_image_5', 'righteye_rgb_image_7', 'righteye_rgb_image_6',
       'righteye_rgb_image_8', 'righteye_roi_image_0', 'lefteye_rgb_image_4'],
      dtype='object')

In [34]:
observations.loc[observations['seq_id'] == 'mcam03273']

Unnamed: 0,sol,seq_id,name,rover_elevation,target_elevation,tau,ltst,focal_distance,incidence_angle,emission_angle,...,righteye_rgb_image_3,righteye_roi_image_8,righteye_roi_image_6,righteye_roi_image_7,righteye_rgb_image_5,righteye_rgb_image_7,righteye_rgb_image_6,righteye_rgb_image_8,righteye_roi_image_0,lefteye_rgb_image_4
mcam03273,762.0,mcam03273,Confidence Hills 2x1,-4460.6216,-4461.1002,0.820226,2021-07-10 13:20:12,3.342,20.6641,44.6238,...,,,,,,,,,,


In [35]:
from cytoolz.functoolz import curry

In [36]:
SEQ_ID_PATTERN = r"mcam\d+(?=_)"
MARSLAB_PATTERN = r'sol\d{4}_mcam\d{5}_spectra.*marslab.csv'
mmatch = curry(re.match)(MARSLAB_PATTERN)

spec_files = tuple(filter(mmatch, input_fs.listdir('')))
for spec_file in spec_files:
    # extract sequence id from filename and associate it with rows of the conglomerate
    # metadata df
    seq_id = re.search(SEQ_ID_PATTERN, spec_file).group()
    try:
        observation = observations.loc[seq_id].dropna().to_dict()
    except KeyError:
        print("no observation for " + spec_file, seq_id)
        continue
    # split out observation fields that aren't image filenames...
    obs_metadata = keyfilter(lambda k: '_image_' not in k, observation)
    # ...and then also pick images probably associated with these ROIs
    image_number = get_image_ordinal(spec_file)
    images = keyfilter(
            lambda k: ('_image_' in k) and (k.endswith(image_number)), observation
        )
    # read in ROI file itself and reformat it for multidex's needs
    frame = pd.read_csv(input_fs.getsyspath(spec_file)).drop("INSTRUMENT", axis=1)
    frame.columns = [column.lower() for column in frame.columns]
    for _, row in frame.iterrows():
        row = row.replace(['-','',' '], np.nan).dropna()
        # double check that seq id and sol are the same in metadata df and marslab file
        # (if not it likely implies a malformatted or misnamed file)
        assert row['sol'] == obs_metadata['sol']
        assert row['seq_id'] == obs_metadata['seq_id']
        # if there are missing filters anywhere in the column, including for other
        # spectra, pandas will read the column
        # as object / string, which will cause confusion when we
        # compute averaged filters, so we do it in this awkward way
        for filt in MSpec.filters:
            if filt in row.index:
                row[filt] = float(row[filt])
        if row['float'] == 'Y':
            row['float'] = True
        else:
            row['float'] = False
        # conglomerate all fields, add filename & stringified image dict                        
        spectrum_dict = dict(row) | obs_metadata | {
            'filename': spec_file, 'images': str(images)
        }
        # put it in the database
        spectrum = MSpec(**spectrum_dict)
        spectrum.clean()
        spectrum.save()

no observation for sol0614_mcam02954_spectra-marslab.csv mcam02954
no observation for sol1333_mcam06935_spectra-marslab.csv mcam06935


In [37]:
row

sol              2582
seq_id      mcam13564
color            teal
feature    dusty rock
float           False
l2            0.10128
l2_err       0.003397
r2           0.115623
r2_err       0.003521
l0b          0.116631
l0b_err      0.004643
r0b          0.130764
r0b_err      0.006099
l1           0.133024
l1_err       0.004008
r1           0.148366
r1_err       0.004895
r0g          0.169958
r0g_err      0.007155
l0g          0.156311
l0g_err      0.005638
r0r          0.240374
r0r_err        0.0112
l0r          0.223752
l0r_err      0.010048
l4           0.242518
l4_err       0.010576
l3           0.267347
l3_err       0.012603
r3           0.293067
r3_err       0.012717
l5           0.284269
l5_err       0.012587
r4           0.301306
r4_err       0.015303
r5           0.296348
r5_err        0.01503
l6           0.286051
l6_err       0.012158
r6           0.293765
r6_err       0.013848
Name: 3, dtype: object

In [38]:
MSpec.objects.filter(sol__iexact=721)[1].images

"{'righteye_roi_image_1': 'sol0721_mcam03082_R263_ROIsOVERLAY.jpg', 'righteye_rgb_image_1': 'sol0721_mcam03082_RGB_R.jpg'}"

In [39]:
modeldict(
    MSpec.objects.filter(sol__iexact=721)[0]
)

{'id': 409,
 'name': 'Bonanza King',
 'sol': 721,
 'ltst': datetime.time(14, 8, 14),
 'seq_id': 'mcam03082',
 'rover_elevation': -4457.0234,
 'target_elevation': -4457.5473,
 'tau': 0.713057,
 'focal_distance': 2.992,
 'incidence_angle': 32.4426,
 'emission_angle': 35.8937,
 'phase_angle': 51.395541,
 'l_s': 178.98,
 'site': 40,
 'drive': 1378,
 'lat': -4.6599585,
 'lon': 137.38275,
 'odometry': 8825.7353,
 'filename': 'sol0721_mcam03082_spectra-marslab.csv',
 'sclk': None,
 'ingest_time': None,
 'color': 'dark red',
 'feature': 'dump piles',
 'images': "{'righteye_roi_image_1': 'sol0721_mcam03082_R263_ROIsOVERLAY.jpg', 'righteye_rgb_image_1': 'sol0721_mcam03082_RGB_R.jpg'}",
 'formation': 'Bradbury Group',
 'member': None,
 'notes': None,
 'float': False,
 'l2': None,
 'l2_err': None,
 'r2': 0.0582489,
 'r2_err': 0.00395696,
 'l0b': None,
 'l0b_err': None,
 'r0b': 0.0687777,
 'r0b_err': 0.00641975,
 'l1': None,
 'l1_err': None,
 'r1': 0.0807962,
 'r1_err': 0.00501365,
 'r0g': 0.091539

In [40]:
set(
    MSpec.objects.values_list('feature')
)

{('DRT target',),
 ('broken rock',),
 ('disturbed soil',),
 ('drill tailings',),
 ('dump piles',),
 ('dusty rock',),
 ('nodule-rich rock',),
 ('other',),
 ('undisturbed soil',),
 ('veins',),
 (None,)}

In [41]:
obs_metadata

{'sol': 2582.0,
 'seq_id': 'mcam13564',
 'name': 'Slangpos Crater',
 'rover_elevation': -4119.31,
 'ltst': Timestamp('2021-07-10 11:36:09'),
 'incidence_angle': 29.4221,
 'site': 77.0,
 'drive': 1560.0}

In [42]:
images

{'lefteye_roi_image_1': 'sol2582_mcam13564_L126_ROIsOVERLAY_R1.jpg',
 'righteye_roi_image_1': 'sol2582_mcam13564_R126_ROIsOVERLAY_R1.jpg',
 'lefteye_rgb_image_1': 'sol2582_mcam13564_RGB_L.jpg',
 'righteye_rgb_image_1': 'sol2582_mcam13564_RGB_R_R1.jpg'}