In [1]:
from correlate import plot_dimensionality_matrices, explode_binary
from plotter.reduction import default_multidex_pipeline, \
    explained_variance_ratios, transform_df
import plotter.models
from plotter.spectrum_ops import filter_df_from_queryset
from multidex_utils import model_metadata_df
from marslab.compat.xcam import DERIVED_CAM_DICT
import marslab.spectops as ops
from fit import correlation_matrix
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolor
from sklearn.linear_model import LogisticRegression

from marslab.imgops.pltutils import attach_axis
from marslab.imgops.pltutils import set_label
import matplotlib.font_manager as mplf


%matplotlib qt
%matplotlib qt

In [95]:
DEFAULT_CORR_NORM = mcolor.TwoSlopeNorm(
    vmin=-1, vcenter=0, vmax=1
)

def make_corr_chart(
    matrix, explained_variance=None, norm=DEFAULT_CORR_NORM,
    cmap='orange_teal'
):
    fig, ax = plt.subplots()
    corrchart = ax.imshow(matrix, norm=norm, cmap=cmap)
    ax.set_yticks(np.arange(len(matrix.index)))
    ax.set_yticklabels([ix for ix in matrix.index])
    ax.set_xticks(np.arange(len(matrix.columns)))
    ax.set_xticklabels([ix for ix in matrix.columns])
    # plt.title(plot_type + "\n" + title)
    cax = attach_axis(ax, "right", "8%")
    plt.colorbar(corrchart, cax=cax)
    if (explained_variance is not None):
        for ix, ev in enumerate(explained_variance):
            ax.annotate(
                str(ev),
                (0, ix),
                # xytext=(0, 0),
                color="white",
                # textcoords="offset points",
            )
    return fig


def explode_sequential(df, column, remove_nan=True):
    if remove_nan is True:
        unique_values = df[column].dropna().unique()
    else:
        unique_values = df[column].unique()
    exploded = pd.Series(index=df.index, dtype='uint8')
    for ix, value in enumerate(unique_values):
        exploded.loc[df[column] == value] = ix
    return exploded

In [96]:
instrument = "ZCAM"
spec_model = plotter.models.INSTRUMENT_MODEL_MAPPING[instrument]
metadata_df = model_metadata_df(spec_model)
filter_info = DERIVED_CAM_DICT[instrument]["filters"]
filters = list(filter_info.keys())

In [98]:
data_df = filter_df_from_queryset(
    spec_model.objects.all(), r_star=False, scale_to=None
)
corpus = pd.concat([metadata_df, data_df], axis=1).copy()
wave_mapping = pd.Series(DERIVED_CAM_DICT[instrument]['filters'])
corpus['r56'] = corpus['R5'] / corpus['R6']
corpus['r16'] = corpus['R1'] / corpus['R6']
corpus['l645'] = ops.band_depth(
    corpus[['L6', 'L4', 'L5']].T,
    None,
    wave_mapping[['L6', 'L4', 'L5']]
)[0]
corpus['r153'] = ops.band_depth(
    corpus[['R1', 'R5', 'R3']].T,
    None,
    wave_mapping[['R1', 'R5', 'R3']]
)[0]
corpus['r465'] = ops.band_depth(
    corpus[['R4', 'R6', 'R5']].T,
    None,
    wave_mapping[['R4', 'R6', 'R5']]
)[0]

params = ['r56', 'r16', 'l645', 'r153', 'r465']
corpus = corpus.dropna(subset=params)



In [218]:
corpus = corpus.loc[corpus['feature']=='rock'].reset_index(drop=True)
# corpus = corpus.loc[~corpus['feature'].isin(['crater rim', 'delta'])]
corpus = corpus.reset_index(drop=True)
log_r = LogisticRegression(multi_class='multinomial', max_iter=5000, class_weight='balanced')
dependent_name = 'seq_id'
dependent = corpus[dependent_name].dropna().unique()
exploded = explode_sequential(corpus, dependent_name)
# independent = filters
independent = params
# scaling changes these models but does not appear
# to make them more predictive
log_r.fit(corpus[independent], exploded)
log_coefficients = pd.DataFrame(
    log_r.coef_,
    columns = independent,
    index = dependent
)
score = log_r.score(corpus[independent], exploded)
morph_chart = make_corr_chart(log_coefficients)
set_label(
    morph_chart, 
    'zcam rocks, logit regression on params and morph classes' ,
    fontproperties=mplf.FontProperties(size=24),
    x_or_y = 'x'
)

Text(0.5, 0, 'zcam rocks, logit regression on params and morph classes')

In [81]:
from sklearn.preprocessing import StandardScaler

In [219]:
log_r.score(corpus[independent], exploded)

0.25308641975308643

In [8]:
ind_coefficients = correlation_matrix(corpus[independent])

In [37]:
plt.rcParams['font.size'] = 18
corrs = correlation_matrix(corpus[independent])
corrs = corrs.rename(
    columns={"incidence_angle": "\u03b8i"},
    index={"incidence_angle": "\u03b8i"},
)
corr_chart = make_corr_chart(corrs)
set_label(
    corr_chart, 
    'zcam rocks, correlation matrix',
    fontproperties=mplf.FontProperties(size=24),
    x_or_y = 'x'
)

Text(0.5, 0, 'zcam rocks, correlation matrix')

In [38]:
corr_chart.tight_layout()
corr_chart.savefig(
    'zcam_rocks_corr_matrix_big', dpi=220, bbox_inches="tight", pad_inches=0
)

Text(0.5, 0, 'zcam rocks, logit regression on params and morph classes')

In [24]:
morph_chart.tight_layout()
morph_chart.savefig(
    'zcam_morph_logit', dpi=220, bbox_inches="tight", pad_inches=0
)

In [None]:
vsplit = np.array_split(ind_coefficients.values, 2)[0]
hsplit = np.array_split(np.flipud(vsplit), 2, axis=1)[0]
corrs = pd.DataFrame(hsplit)
corrs.columns = independent[:len(corrs.columns)]
corrs.index = list(reversed(independent[:len(corrs.index)]))

In [None]:
pca_fields = filters
param_fields = ['r56', 'r16', 'l645', 'r153', 'r465']
corr_fields = param_fields + list(exploded.columns)
corr_fields += ['incidence_angle']
corr_fields += filters

corpus = corpus.dropna(
    subset = set(pca_fields).union(corr_fields), axis=0
).reset_index(drop=True)
corpus = corpus.rename(
            index={"incidence_angle": "\u03b8i"}
        )
pca_data = corpus[pca_fields].copy()
corr_data = corpus[corr_fields].copy()


pipeline = default_multidex_pipeline()
transform = transform_df(pca_data, pipeline)
corr_data = pd.concat([corr_data, transform], axis=1)
correlations = correlation_matrix(corr_data)
evr = np.round(explained_variance_ratios(transform)*100, 2)

In [None]:
corr_fields = [f for f in corr_fields if f in correlations.columns]
feature_quadrant = correlations.loc[corr_fields, corr_fields].copy()

In [None]:
feature_quadrant = correlations.loc[corr_fields, corr_fields].copy()

In [160]:
correlations = pd.concat([correlations, transform], axis=0)

figs = plot_dimensionality_matrices(
    correlations, transform, corr_fields, evr,
    corr_cmap="orange_teal", fontsize=16
)

plt.close('all')

NameError: name 'transform' is not defined

In [159]:
fig = figs['features']
fig.tight_layout()
fig.show()

NameError: name 'figs' is not defined

In [None]:
fig = figs['parameters']
fig.tight_layout()
fig.show()

In [None]:
fig.tight_layout()


In [None]:
correlations

In [None]:
fig.savefig('test.png', dpi=275, bbox_inches="tight", pad_inches=0)

In [None]:
DERIVED_CAM_DICT['ZCAM']