In [3]:
import os
import re

import django
import fs.path
import numpy as np
import pandas as pd
from django.core.exceptions import ObjectDoesNotExist
from fs.osfs import OSFS
from cytoolz.functoolz import curry
from toolz import keyfilter

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "multidex.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()

from plotter.models import *
from multidex_utils import modeldict

In [17]:
for spec in MSpec.objects.all():
    spec.delete()
MSpec.objects.all()

<QuerySet []>

In [13]:
s = MSpec.objects.filter(sol=2992)[0]

In [16]:
s.images

"{'righteye_roi_image_1': 'sol2992_mcam15599_R50R2_do.jpg', 'lefteye_roi_image_1': 'sol2992_mcam15599_L50R2_do.jpg'}"

In [5]:
input_fs = OSFS("/home/michael/Downloads/multidex_new/")
output_fs = OSFS('.')
output_image_dir = output_fs.getsyspath("plotter/application/assets/browse/mcam/")

In [7]:
overlay_images = [
    image for image in input_fs.listdir('.') if image.endswith('jpg')
]

In [8]:
def get_image_ordinal(mastcam_image_fn_no_ext):
    ordinal_appendage = re.search(r'_[RL](\d)$', mastcam_image_fn_no_ext)
    if ordinal_appendage:
        return ordinal_appendage.group(1)
    else:
        return '1'

In [None]:
observations = {}
for file in input_fs.listdir('.'):
    if not file.endswith('.csv'):
        continue
    
    # skip observations we think are 'bad'
#     if row['seq_id'] in BAD_MCAMS:
#         continue
    # drop NaN-valued fields and populate observation SQL fields
    # from CSV fields
    row.dropna(inplace=True)
    obs = dict(zip(row.index,row.values))
    # this is the canonical prefix for image / spectra files 
    obs_identifier = 'sol' + format(
            int(row['sol']), "0>4d"
        ) + '_' + row['seq_id']
    overlay_image_list = [
        image for image in overlay_images 
        if image.startswith(obs_identifier)
    ]
#     rgb_image_list = [
#         image for image in rgb_images 
#         if image.startswith(obs_identifier)
#     ]
    # associate observation with images using the convoluted decision tree
    # that appears to have been used to name the images (usually)
    for image in overlay_image_list:
        image_eye = None
        basename = fs.path.splitext(image)[0]
        if re.search(r'_R\d.*?_do', basename):
            image_eye = 'righteye'
        elif re.search(r'_L\d.*?_do', basename):
            image_eye = 'lefteye'
        elif re.search(r'_R_do', basename):
            image_eye = 'righteye'
        elif re.search(r'_L_do', basename):
            image_eye = 'lefteye'
        if image_eye is None:
            raise ValueError('search failed')
        obs[image_eye + '_roi_image_' + get_image_ordinal(basename)] = image
    # note subtle, delicious differences in RGB image naming conventions
#     for image in rgb_image_list:
#         basename = fs.path.splitext(image)[0]
#         if re.search(r'R(_R\d)?$', basename):
#             image_eye = 'righteye'
#         elif re.search(r'L(_[LR]\d)?$', basename):
#             image_eye = 'lefteye'
#         obs[image_eye + '_rgb_image_' + get_image_ordinal(basename)] = image

    observations[row['seq_id']] = obs
observations = pd.DataFrame(observations).T
observations

In [None]:
metaframe = pd.read_csv(input_fs.getsyspath('Metadata-marslab.csv'))
metaframe.columns = [column.lower() for column in metaframe.columns]
# add NaNs back in so we can programmatically delete them
metaframe.replace('-', np.nan, inplace=True)
# we're turning these to ints when we ingest them,
# but python doesn't like statements like int('3.0'),
# so turn to float as an intermediate step
for column in [
    'sol', 'site', 'drive', 'rover_elevation', 'target_elevation', 'tau',
    'focal_distance', 'incidence_angle', 'emission_angle',
    'phase_angle', 'l_s', 'site', 'drive', 'lat', 'lon', 'odometry'
]:
    metaframe[column] = metaframe[column].astype('float')
metaframe['ltst'] = metaframe['ltst'].astype('datetime64')

In [None]:
# lots of missing values and no spectra, skip for now
# BAD_MCAMS = ['mcam13523']
overlay_images = [
    image for image in input_fs.listdir('DCS_ROI_images_clean')
]
# rgb_images = [
#     image for image in input_fs.listdir('RGB_images')
# ]

In [None]:
def get_image_ordinal(mastcam_image_fn_no_ext):
    ordinal_appendage = re.search(r'_[RL](\d)$', mastcam_image_fn_no_ext)
    if ordinal_appendage:
        return ordinal_appendage.group(1)
    else:
        return '1'

In [None]:
# make our temporary dict of 'shared' observation data
observations = {}
for ix, row in metaframe.iterrows():
    # skip observations we think are 'bad'
#     if row['seq_id'] in BAD_MCAMS:
#         continue
    # drop NaN-valued fields and populate observation SQL fields
    # from CSV fields
    row.dropna(inplace=True)
    obs = dict(zip(row.index,row.values))
    # this is the canonical prefix for image / spectra files 
    obs_identifier = 'sol' + format(
            int(row['sol']), "0>4d"
        ) + '_' + row['seq_id']
    overlay_image_list = [
        image for image in overlay_images 
        if image.startswith(obs_identifier)
    ]
#     rgb_image_list = [
#         image for image in rgb_images 
#         if image.startswith(obs_identifier)
#     ]
    # associate observation with images using the convoluted decision tree
    # that appears to have been used to name the images (usually)
    for image in overlay_image_list:
        image_eye = None
        basename = fs.path.splitext(image)[0]
        if re.search(r'_R\d.*?_do', basename):
            image_eye = 'righteye'
        elif re.search(r'_L\d.*?_do', basename):
            image_eye = 'lefteye'
        elif re.search(r'_R_do', basename):
            image_eye = 'righteye'
        elif re.search(r'_L_do', basename):
            image_eye = 'lefteye'
        if image_eye is None:
            raise ValueError('search failed')
        obs[image_eye + '_roi_image_' + get_image_ordinal(basename)] = image
    # note subtle, delicious differences in RGB image naming conventions
#     for image in rgb_image_list:
#         basename = fs.path.splitext(image)[0]
#         if re.search(r'R(_R\d)?$', basename):
#             image_eye = 'righteye'
#         elif re.search(r'L(_[LR]\d)?$', basename):
#             image_eye = 'lefteye'
#         obs[image_eye + '_rgb_image_' + get_image_ordinal(basename)] = image

    observations[row['seq_id']] = obs
observations = pd.DataFrame(observations).T
observations

In [None]:
def check_binocularity(row, images, spec_file, seq_id):
    filts = [f for f in row.index if f.upper() in MSpec.filters]
    if any([f.startswith('r') for f in filts]):
        if len(
            keyfilter(lambda key: 'right' in key, images)
        ) == 0:
            print("no righteye image for " + spec_file, seq_id)
    if any([f.startswith('l') for f in filts]):
        if len(
            keyfilter(lambda key: 'left' in key, images)
        ) == 0:
            print("no lefteye image for " + spec_file, seq_id)

In [None]:
SEQ_ID_PATTERN = r"mcam\d+(?=_)"
MARSLAB_PATTERN = r'sol\d{4}_mcam\d{5}_spectra.*marslab.csv'
mmatch = curry(re.match)(MARSLAB_PATTERN)

spec_files = tuple(filter(mmatch, input_fs.listdir('')))
for spec_file in spec_files:
    # extract sequence id from filename and associate it with rows of the conglomerate
    # metadata df
    seq_id = re.search(SEQ_ID_PATTERN, spec_file).group()
    try:
        observation = observations.loc[seq_id].dropna().to_dict()
    except KeyError:
        print("no observation for " + spec_file, seq_id)
#         raise ValueError
        continue
    # split out observation fields that aren't image filenames...
    obs_metadata = keyfilter(lambda k: '_image_' not in k, observation)
    # ...and then also pick images probably associated with these ROIs
    image_number = get_image_ordinal(spec_file.replace("-marslab.csv", ""))
    # unusual image sequencing
    if seq_id == 'mcam10141':
        images = keyfilter(lambda k: ('_image_' in k), observation)
    else:
        images = keyfilter(
            lambda k: ('_image_' in k) and (k.endswith(image_number)), observation
        )
    if len(images) == 0:
        print("no images for " + spec_file, seq_id)
#         if seq_id not in [
#             'mcam01097', 'mcam04524', 'mcam00859', 'mcam00868', 'mcam01198',
#             'mcam08597'
#         ]:
        raise ValueError
    # read in ROI file itself and reformat it for multidex's needs
    frame = pd.read_csv(input_fs.getsyspath(spec_file)).drop("INSTRUMENT", axis=1)
    frame.columns = [column.lower() for column in frame.columns]
    for _, row in frame.iterrows():
#         if row['seq_id'] == 'mcam05095':
#             raise ValueError
        row = row.replace(['-','',' '], np.nan).dropna()
        # double check that seq id and sol are the same in metadata df and marslab file
        # (if not it likely implies a malformatted or misnamed file)
        assert row['sol'] == obs_metadata['sol']
        assert row['seq_id'] == obs_metadata['seq_id']
        check_binocularity(row, images, spec_file, seq_id)
        # if there are missing filters anywhere in the column, including for other
        # spectra, pandas will read the column
        # as object / string, which will cause confusion when we
        # compute averaged filters, so we do it in this awkward way
        for filt in MSpec.filters:
            if filt.lower() in row.index:
                row[filt.lower()] = float(row[filt.lower()])
        if row['float'] == 'Y':
            row['float'] = 'floating'
        else:
            row['float'] = 'in-place or N/A'
        # conglomerate all fields, add filename & stringified image dict                        
        spectrum_dict = dict(row) | obs_metadata | {
            'filename': spec_file, 'images': str(images)
        }
        # put it in the database
        spectrum = MSpec(**spectrum_dict)
        spectrum.clean()
        spectrum.save()

In [None]:
ic = [c for c in observations.columns if 'image' in c]
observations.loc[observations['seq_id'] == 'mcam01097'][ic]


In [None]:
observations.loc[observations['seq_id'] == 'mcam01097'].to_dict()

In [None]:
MSpec.objects.all()[0].images

In [None]:
MSpec.objects.filter(sol__iexact=721)[1].images

In [None]:
modeldict(
    MSpec.objects.filter(sol__iexact=721)[0]
)

In [None]:
set(
    MSpec.objects.values_list('feature')
)

In [None]:
obs_metadata

In [None]:
import sh