In [1]:
import os
import re

import django
import fs.path
import numpy as np
from django.core.exceptions import ObjectDoesNotExist
from fs.osfs import OSFS

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mastspec.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()

from plotter.models import *
# from mastspec.views import *
# from mastspec.forms import *
from utils import modeldict

In [None]:
# for spec in MSpec.objects.all():
#     spec.delete()
# for obs in MObs.objects.all():
#     obs.delete()

In [2]:
input_fs = OSFS("/home/michael/mastcam_data/")
output_fs = OSFS('.')
output_image_dir = output_fs.getsyspath("static_in_pro/our_static/img/")
input_fs.listdir('')

['images', 'spectra', 'Metadata-marslab.csv']

In [3]:
metaframe = pd.read_csv(input_fs.getsyspath('Metadata-marslab.csv'))
metaframe.columns = [column.lower() for column in metaframe.columns]
# add NaNs back in so we can programmatically delete them
metaframe.replace('-', np.nan, inplace=True)
# we're turning these to ints when we ingest them,
# but python doesn't like statements like int('3.0'),
# so turn to float as an intermediate step
for column in ['sol', 'site', 'drive']:
    metaframe[column] = metaframe[column].astype('float')
metaframe['ltst'] = metaframe['ltst'].astype('datetime64')

In [None]:
# def am_i_broken(putative_float):
#     try:
#         float(putative_float)
#         return False
#     except ValueError:
#         return True
#     raise
# metaframe.loc[
#     metaframe['lat'].map(am_i_broken)
# ]

In [None]:
# lots of missing values and no spectra, skip for now
BAD_MCAMS = ['mcam13523']
overlay_images = [
    image for image in input_fs.listdir('images/roi_images')
]
rgb_images = [
    image for image in input_fs.listdir('images/rgb_images')
]

In [9]:
def get_image_ordinal(mastcam_image_fn_no_ext):
    ordinal_appendage = re.search(r'_[RL](\d)$', mastcam_image_fn_no_ext)
    if ordinal_appendage:
        return ordinal_appendage.group(1)
    else:
        return '1'

In [None]:
for ix, row in metaframe.iterrows():
    # skip observations we think are 'bad'
    if row['seq_id'] in BAD_MCAMS:
        continue
    # drop NaN-valued fields and populate observation SQL fields
    # from CSV fields
    row.dropna(inplace=True)
    obs = MObs(**dict(zip(row.index,row.values)))
    # this is the canonical prefix for image / spectra files 
    obs_identifier = 'sol' + format(
            int(row['sol']), "0>4d"
        ) + '_' + row['seq_id']
    overlay_image_list = [
        image for image in overlay_images 
        if image.startswith(obs_identifier)
    ]
    rgb_image_list = [
        image for image in rgb_images 
        if image.startswith(obs_identifier)
    ]
    # associate observation with images using the convoluted decision tree
    # that appears to have been used to name the images (usually)
    for image in overlay_image_list:
        basename = fs.path.splitext(image)[0]
        if re.search(r'_R\d.*?_ROIs', basename):
            image_eye = 'righteye'
        elif re.search(r'_L\d.*?_ROIs', basename):
            image_eye = 'lefteye'
        # note that we _want_ this to throw a NameError if image_eye is undefined 
        setattr(
            obs, 
            image_eye + '_roi_image_' + get_image_ordinal(basename),
            image
        )
    # note subtle, delicious differences in RGB image naming conventions
    for image in rgb_image_list:
        basename = fs.path.splitext(image)[0]
        if re.search(r'R(_R\d)?$', basename):
            image_eye = 'righteye'
        elif re.search(r'L(_[LR]\d)?$', basename):
            image_eye = 'lefteye'
        setattr(
            obs, 
            image_eye + '_rgb_image_' + get_image_ordinal(basename),
            image
        )
    obs.clean()
    obs.save()
pd.DataFrame(map(modeldict, MObs.objects.all()))

In [None]:
# for spec in MSpec.objects.all():
#     spec.delete()

In [10]:
SEQ_ID_PATTERN = r"mcam\d+(?=_)"
for spec_file in input_fs.listdir('spectra')[0:20]:
    seq_id = re.search(SEQ_ID_PATTERN, spec_file).group()
    try:
        observation = MObs.objects.get(seq_id__iexact=seq_id)
    except ObjectDoesNotExist:
        print("no observation for " + spec_file, seq_id)
        continue
    frame = pd.read_csv(input_fs.getsyspath('spectra/' + spec_file))
    frame.columns = [column.lower() for column in frame.columns]
    image_number = get_image_ordinal(spec_file)
    for _, row in frame.iterrows():
        row = row.replace(['-','',' '], np.nan).dropna()
        # we would like these metadata to be carried on the parent
        # observation rather than on the spectrum (i.e., we don't need
        # an extra pivot because we already have a FOREIGN KEY.) but
        # we want to make sure they match!
        assert row['sol'] == observation.sol
        assert row['seq_id'] == observation.seq_id
        row.drop(['sol','seq_id', 'instrument'], inplace=True)
        if row['float'] == 'Y':
            row['float'] = True
        else:
            row['float'] = False
        metadata = dict(row) | {
            'observation': observation,
            'image_number': image_number,
            'filename': spec_file
        }
#         spectrum = MSpec(**metadata)
#         spectrum.clean()
#         spectrum.save()


In [12]:
frame

Unnamed: 0,sol,seq_id,instrument,color,feature,formation,member,float,l2,l2_err,...,l5,l5_err,r4,r4_err,r5,r5_err,l6,l6_err,r6,r6_err
0,397,mcam01650,MCAM,dark blue,-,Bradbury Group,-,N,0.111288,0.0153,...,,,,,,,0.337403,0.031909,,
1,397,mcam01650,MCAM,light blue,-,Bradbury Group,-,N,0.168703,0.014909,...,,,,,,,0.548002,0.033366,,
2,397,mcam01650,MCAM,red,-,-,-,N,0.080209,0.013953,...,,,,,,,0.304274,0.027118,,
