In [2]:
import os
from itertools import product
import re

import django
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.cm import register_cmap
from matplotlib.colors import ListedColormap
from sklearn.decomposition import PCA, FactorAnalysis
import marslab.spectops as ops
from fit import correlation_matrix
from marslab.imgops.imgutils import normalize_range
from marslab.compat.xcam import DERIVED_CAM_DICT
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.pipeline import Pipeline
from marslab.imgops.pltutils import attach_axis, set_colorbar_font


from correlate import (
    s_from_midnight, explode_binary, translate_fields_for_corr_graphs,
    preprocess_for_corrs, plot_dimensionality_matrices,
    plot_mdex_pca
)

# os.chdir("..")

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "multidex.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

django.setup()
import plotter.models
from plotter.spectrum_ops import filter_df_from_queryset
from multidex_utils import model_metadata_df

%matplotlib qt
%matplotlib qt

In [3]:
def explained_variance_ratios(array):
    variances = array.var(axis=0)
    total = sum(variances)
    return variances / total

In [5]:
instrument = "ZCAM"
filter_info = DERIVED_CAM_DICT[instrument]["filters"]
filters = list(filter_info.keys())
narrowband = [filt for filt in filters if not re.match(r"[LR]0[RGB]", filt)]


In [None]:
plt.rcParams["figure.figsize"] = (11, 11)
corrs = []
transforms = []
figlist = []
for r_star, scale_to, search_terms, norm_values, filts in product(
    [False, True],
#     [None, ("L1", "R1")],
    [None],
    [None],
#     [None, [("feature", "rock")]],
    [False, 'R6'],
#     [PCA, FactorAnalysis]
    [filters, narrowband]
    
):
#     if search_terms is not None:
#         explode_field = "morphology"
#     else:
#         explode_field = None
    if (scale_to is not False) and (r_star is True):
        continue     # pointless, move on
    explode_field=None
    method=PCA
    correlations, transform, figs = plot_mdex_pca(
        scale_to = scale_to,
        instrument=instrument,
        explode_field = explode_field,
        r_star = r_star,
        search_terms = search_terms,
        corr_fields = filts + [
            'incidence_angle'
        ],
        pca_fields=filts,
        corr_cmap="orange_teal",
        fontsize=18,
        method=method,
        norm_values=norm_values
        
    )
    corrs.append(correlations)
    transforms.append(transform)
    figlist.append(figs)

In [None]:
plt.close('all')

In [None]:
for figs in figlist:
    figs['pc correlations'].show()

In [24]:
from fit import correlation_matrix

In [34]:
correlation_matrix(corpus[['zoom', 'sol']])

Unnamed: 0,zoom,sol
zoom,1.0,0.51
sol,0.51,1.0


In [32]:
corpus['zoom']

1      110
2      110
3      110
4      110
5      110
      ... 
317     34
318     63
319     63
320     63
321     63
Name: zoom, Length: 321, dtype: object

In [186]:
instrument='ZCAM'
spec_model = plotter.models.INSTRUMENT_MODEL_MAPPING[instrument]
wave_mapping = pd.Series(DERIVED_CAM_DICT[instrument]['filters'])
narrowband = [filt for filt in filters if not re.match(r"[LR]0[RGB]", filt)]
filter_info = DERIVED_CAM_DICT[instrument]["filters"]
filters = list(filter_info.keys())
metadata_df = model_metadata_df(spec_model)
search_terms=None
norm_values = False
pca_fields = filters
# corr_fields = filters + ["incidence_angle", 'r16', 'r56', 'l645']
# corr_fields = filters + ["incidence_angle", "zoom", 'sol']
# corr_fields = filters + ["incidence_angle", 'ltst', 'sclk']
corr_fields = filters + ['feature']
explode_field = None
r_star=True
# scale_to=('L1', 'R1')
scale_to=None
# scale_to=('L6', 'R6')

data_df = filter_df_from_queryset(
    spec_model.objects.all(), r_star=r_star, scale_to=scale_to
)
corpus = pd.concat([metadata_df, data_df], axis=1)
# corpus["ltst"] = corpus["ltst"].map(s_from_midnight)
# corpus["avg"] = corpus[filters].mean(axis=1)
# corpus['r56'] = corpus['R5'] / corpus['R6']
# corpus['r16'] = corpus['R1'] / corpus['R6']
# corpus['l645'] = ops.band_depth(
#     corpus[['L6', 'L4', 'L5']].T,
#     None,
#     wave_mapping[['L6', 'L4', 'L5']]
# )[0]
# if 'zoom' in corpus.columns:
#     corpus['zoom'] = corpus['zoom'].astype('float16')
if (explode_field is not None) and (explode_field in corpus.columns):
    exploded = explode_binary(corpus, explode_field)
    search = pd.concat([corpus.copy(), exploded], axis=1)
else:
    exploded = None
    search = corpus.copy()

# fields to do pca on
pca_fields = translate_fields_for_corr_graphs(filters, pca_fields)

# fields to compare with the PCs
corr_fields = translate_fields_for_corr_graphs(filters, corr_fields)
if exploded is not None:
    corr_fields += list(exploded.columns)
# corr_fields += [band + "_err" for band in filters]
# pca_data, corr_data = preprocess_for_corrs(
#     corr_fields, norm_values, pca_fields, search, search_terms
# )
# pca_data = search[pca_fields].dropna(axis=0)
corr_data = search[corr_fields].dropna(axis=0)

In [187]:
corr_data

Unnamed: 0,L6,L0B,R0B,L5,L0G,R0G,L4,L0R,R0R,L3,L2,L1,R1,R2,R3,R4,R5,R6,feature
1,0.072222,0.080999,0.082412,0.099139,0.116740,0.119189,0.180036,0.200325,0.194455,0.235365,0.243744,0.233042,0.243213,0.226327,0.222173,0.226166,0.228479,0.235168,rock
2,0.088154,0.095664,0.086872,0.111945,0.128799,0.126195,0.177291,0.194865,0.202656,0.221459,0.243827,0.242504,0.240988,0.238397,0.240533,0.236284,0.245700,0.242829,rock
3,0.065467,0.079607,0.083772,0.109189,0.128300,0.129443,0.200457,0.218309,0.232699,0.256118,0.281261,0.281770,0.287497,0.272505,0.279499,0.276099,0.279329,0.283034,rock
4,0.096357,0.102721,0.106567,0.113464,0.131139,0.137448,0.176554,0.198964,0.206228,0.223834,0.239356,0.237405,0.246907,0.230447,0.241505,0.242392,0.240957,0.243196,rock
5,0.063126,0.073812,0.070695,0.095568,0.109957,0.106322,0.168386,0.180775,0.185341,0.208744,0.238193,0.234631,0.230979,0.218020,0.219707,0.217391,0.221618,0.227225,rock
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317,0.090819,0.104868,0.105656,0.132851,0.144694,0.146213,0.207485,0.217992,0.223417,0.244759,0.257619,0.250467,0.260452,0.245925,0.242965,0.241346,0.241438,0.237125,pebble
318,0.091603,0.105510,0.106637,0.133381,0.150925,0.155868,0.216670,0.229963,0.234644,0.253368,0.268797,0.261489,0.271283,0.258299,0.257945,0.256944,0.259069,0.257204,rock
319,0.082933,0.102542,0.105261,0.137470,0.157088,0.163465,0.233388,0.251349,0.256410,0.276584,0.295920,0.294129,0.299777,0.287696,0.286022,0.285333,0.286627,0.285842,rock
320,0.077842,0.099373,0.103351,0.138536,0.164123,0.173747,0.266175,0.290157,0.304784,0.332353,0.366180,0.364294,0.384236,0.367852,0.368574,0.368751,0.372142,0.369796,rock


In [143]:
pca_data = corr_data[pca_fields]

In [118]:
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.pipeline import Pipeline

In [135]:
def explained_variance_ratios(array):
    variances = array.var(axis=0)
    total = sum(variances)
    return variances / total

In [179]:
pca_data['L2'].mean()

0.08399890072858918

In [160]:
pca_data = pca_data.sort_values(by="L2")

In [164]:
vectors = pca_data.T.to_dict("list")
vectarray = np.array(tuple(vectors.values()))
transform = pipe.fit_transform(vectarray)

In [175]:
pca_data['L2'].mean()

0.08399890072858918

In [180]:
from plotter.reduction import default_multidex_pipeline

In [182]:
pipe = default_multidex_pipeline()

In [178]:
pca_data['R2'].mean()

0.09292413975327307

In [176]:
pipe

Pipeline(steps=[('norm', Normalizer()), ('scale', StandardScaler()),
                ('reduce', PCA(n_components=8))])

In [165]:

transform = pd.DataFrame(transform)
transform.columns = [
    "P" + str(column + 1) for column in transform.columns
]
# explained_variance_ratios(transform)
# if "explained_variance_ratio_" in dir(pipe.named_steps['reduce']):
#     explained_variance = np.round(
#         pipe.named_steps['reduce'].explained_variance_ratio_ * 100, 2
#     )
# else:
#     explained_variance = None
# corr_frame = pd.concat([corr_data, transform], axis=1)
# correlations = correlation_matrix(corr_frame)

plot_dimensionality_matrices(
    correlations, 
    transform, 
    corr_fields, 
    explained_variance=explained_variance,
    corr_cmap='orange_teal', 
    fontsize=20,
    which='parameters',
    title=" ".join(pipe.named_steps.keys())

)

{'parameters': <Figure size 640x480 with 2 Axes>}

In [147]:
explained_variance_ratios(transform)

P1    0.661730
P2    0.206868
P3    0.090439
P4    0.014780
P5    0.011854
P6    0.005445
P7    0.004849
P8    0.004037
dtype: float64

In [185]:
PCA??

In [107]:
from sklearn.decomposition import FastICA
vectors = pca_data.T.to_dict("list")
vectarray = np.array(tuple(vectors.values()))
pipe = Pipeline(steps=[
    ('norm', Normalizer()),
    ('scale', StandardScaler()),
    ('ica', FastICA(n_components=8, whiten=True))
])
transform = pipe.fit_transform(vectarray)
transform = pd.DataFrame(transform)
transform.columns = ["P" + str(column + 1) for column in transform.columns]

corr_frame = pd.concat([corr_data, transform], axis=1)
correlations = correlation_matrix(corr_frame)

plot_dimensionality_matrices(
    correlations, 
    transform, 
    corr_fields, 
    corr_cmap='orange_teal', 
    fontsize=20,
    which='parameters',
    title=" ".join(pipe.named_steps.keys())

)

{'parameters': <Figure size 640x480 with 2 Axes>}

In [120]:
from sklearn.decomposition import NMF
vectors = pca_data.T.to_dict("list")
vectarray = np.array(tuple(vectors.values()))
pipe = Pipeline(steps=[
    ('norm', Normalizer()),
#     ('scale', StandardScaler()),
    ('nmf', NMF(n_components=16, init='random', max_iter=5000))
])
transform = pipe.fit_transform(vectarray)
transform = pd.DataFrame(transform)
transform.columns = ["P" + str(column + 1) for column in transform.columns]

corr_frame = pd.concat([corr_data, transform], axis=1)
correlations = correlation_matrix(corr_frame)

plot_dimensionality_matrices(
    correlations, 
    transform, 
    corr_fields, 
    corr_cmap='orange_teal', 
    fontsize=20,
    which='parameters',
    title=" ".join(pipe.named_steps.keys())

)

{'parameters': <Figure size 640x480 with 2 Axes>}

In [117]:
explained_variance_ratios(transform)

P1     0.194671
P2     0.227756
P3     0.028433
P4     0.037464
P5     0.032736
P6     0.035942
P7     0.019016
P8     0.019965
P9     0.025239
P10    0.023616
P11    0.051665
P12    0.025683
P13    0.086045
P14    0.001573
P15    0.036302
P16    0.153895
dtype: float64

In [103]:
from sklearn.decomposition import KernelPCA
vectors = pca_data.T.to_dict("list")
vectarray = np.array(tuple(vectors.values()))
pipe = Pipeline(steps=[
    ('norm', Normalizer()),
    ('scale', StandardScaler()),
    ('kernel', KernelPCA(n_components=16))
])
transform = pipe.fit_transform(vectarray)
transform = pd.DataFrame(transform)
transform.columns = ["P" + str(column + 1) for column in transform.columns]

corr_frame = pd.concat([corr_data, transform], axis=1)
correlations = correlation_matrix(corr_frame)

plot_dimensionality_matrices(
    correlations, 
    transform, 
    corr_fields, 
    corr_cmap='orange_teal', 
    fontsize=20,
    which='parameters',
    title=" ".join(pipe.named_steps.keys())

)

{'parameters': <Figure size 640x480 with 2 Axes>}

In [99]:
np.array([1,1,2,3,3]).var(axis=0)

0.8

P1     0.672857
P2     0.190888
P3     0.044724
P4     0.035118
P5     0.016631
P6     0.012657
P7     0.007040
P8     0.006082
P9     0.003611
P10    0.002769
P11    0.002035
P12    0.001861
P13    0.001277
P14    0.001068
P15    0.000794
P16    0.000587
dtype: float64

In [66]:
from sklearn.metrics import explained_variance_score

In [71]:
transform

Unnamed: 0,P1,P2,P3,P4,P5,P6,P7,P8
0,-2.267093,-1.724095,0.434542,-0.182625,-0.187267,-0.933902,-1.156069,0.546855
1,-2.526464,1.224259,0.746468,0.138954,0.066854,-0.306094,0.651362,-0.030773
2,-5.165960,-0.126977,-0.271500,0.345461,0.035312,-0.422036,0.446803,-0.135420
3,-1.344621,1.834564,0.297798,0.390662,0.517248,-0.801794,0.171038,0.062085
4,-3.907494,-0.984149,1.127406,0.517246,0.036090,-0.646001,0.476109,-0.586003
...,...,...,...,...,...,...,...,...
308,0.505675,-0.630974,-0.267743,0.622051,0.131681,0.234267,-0.224845,-0.177420
309,-0.012375,-0.093916,-0.510644,0.184119,-0.180987,0.051594,-0.162309,-0.410684
310,-1.503885,-0.517673,-0.687886,-0.109053,-0.038638,0.236416,0.069289,-0.567178
311,-5.692225,0.172917,-0.866286,0.416134,0.062320,0.049577,-0.089146,-0.441941


In [73]:
vectarray.shape

(313, 18)

In [77]:
from fit import coef_det

In [None]:
plot_dimensionality_matrices?