## 注意 : このファイルは以下の環境下で行う

In [1]:
# Python 3.8.18
# Numpy 1.24.3
# Pandas 2.0.3
# Matplotlib 3.7.2
# Scipy 1.10.1
# Scikit-learn 1.3.0
# Astropy 5.1
# Datashader 0.15.2
# Tqdm 4.65.0
# umap-learn 0.5.3


import os, sys, glob
import pandas as pd
import numpy as np
import matplotlib as mpl
# mpl.use('TKAgg',warn=False, force=True) #set MPL backend.
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pickle #save/load python data objects (dictionaries/arrays)
import multiprocessing
import itertools
from sklearn import preprocessing
from sklearn.neighbors import NearestNeighbors
import datetime
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
from matplotlib.colorbar import Colorbar

#from sklearn.manifold import TSNE #single core TSNE, sklearn.
#from MulticoreTSNE import MulticoreTSNE as multiTSNE #multicore TSNE, not sklearn implementation. Must be installed separately. # not used here since UMAP is far better.
import umap
import datashader as ds
import datashader.transfer_functions as tf
from datashader.transfer_functions import shade, stack
from functools import partial
from datashader.utils import export_image
from datashader.colors import *

  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


## 使用する関数

In [2]:
# ファイルのLoading/saving
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
# デバック用
    with open(name + '.pkl', 'rb') as f:
# ファイル実行用
    # with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)
#------------------------------------------------------------------------------------------------------------ 





#------------------------------------------------------------------------------------------------------------ 

# プロット ・ データ準備の関数

#------------------------------------------------------------------------------------------------------------ 


# Customised histogram function to help plotting on log scale and other aspects (courtesy of Justin Bray)
def histvals(a, logmin=0.0, cumulative=False, **kwargs):  # **kwargsは辞書型で変数を入れることができる
  """Return x/y values for plotting a histogram."""

  if cumulative: # 累積に関して使うときにもう一度詳しくみた方がいい
    lims = kwargs.pop('bins', [a.min(), a.max()]) # ビンのサイズの最大値と最小値
    density = kwargs.pop('density', False) 
    assert not kwargs, 'Unprocessed kwargs in histvals.' # kwargsに使用されてない引数がある場合アラートする

    # Reduce length of array, if possible, by combining duplicate values.
    bins,counts = np.unique(a, return_counts=True)
    #counts, bins = np.histogram(a, **kwargs)

    bins = np.concatenate(( [lims[0]], bins, [lims[-1]] ))
    counts = np.concatenate(( [0], np.cumsum(counts) )) # np.cumsum(counts)で累積している

    if density:
      counts = counts*1./counts.max()
  else:
    counts, bins = np.histogram(a, **kwargs) #countには各ビンの度数 binsにはビンの境界値が入る

  x = np.concatenate( list(zip( bins[:-1], bins[1:] )) ) #軸が対数であるため、わかりやすく表示するため、x,yはどちらも値を2つずつ持ってる
  y = np.concatenate( list(zip( counts,    counts   )) )

  if not cumulative:
    x = np.concatenate(( [x[0]], x, [x[-1]] ))
    y = np.concatenate(( [0],    y,  [0]    ))

  # 負のあたいがあったとしてもうまくプロットされるようにloglim(非常に小さい正の値)にしてる
  if logmin:
    y = (y > 0)*y + (y <= 0)*logmin
  return x,y
#------------------------------------------------------------------------------------------------------------



    
def make_cmap(colors, position=None, bit =False): 
    #bit=Flaseはcolors = [(1,1,1), (101/255,236/255,101/255)] 
    #bit=Trueはcolors = [(255,255,255), (101,236,101)] →if bit: で形を変形
    bit_rgb = np.linspace(0,1,256) #0~1を256分割
    if position == None:
        position =np.linspace(0,1,len(colors))
    else:
        if len(position) != len(colors):
            sys.exit("position length must be the same as colors")
        elif position[0] != 0 or position[-1] != 1:
            sys.exit("position must start with 0 and end with 1")
            
    if bit: #ここでcolorsをbit=Trueの場合と同じ形にする
        for i in range(len(colors)):
            colors[i] = (bit_rgb[colors[i][0]],
                         bit_rgb[colors[i][1]],
                         bit_rgb[colors[i][2]])
            
    cdict = {'red':[], 'green':[], 'blue':[]}
    for pos, color in zip(position, colors):
        cdict['red'].append((pos, color[0], color[0]))
        cdict['green'].append((pos, color[1], color[1]))
        cdict['blue'].append((pos, color[2], color[2]))
    cmap = mpl.colors.LinearSegmentedColormap('my_colormap',cdict,256)
    return cmap
#------------------------------------------------------------------------------------------------------------





def metrics(df, classes_pred_all):
    report=classification_report(df['classes_test'], classes_pred_all, target_names=np.unique(df['class_names']), digits=4, output_dict=True)
    report_df = pd.DataFrame(report).transpose()
    cm = confusion_matrix(df['classes_test'], classes_pred_all, labels=df['class_names']) #confusion matrixの略
    cm_df = pd.DataFrame(cm, index=df['class_names'], columns=df['class_names'])
    return report_df, cm_df
#------------------------------------------------------------------------------------------------------------


def prepare_data(df, feature_columns, train_percent=0.5):
    all_features = df[[*feature_columns]]
    all_classes = df['class']
    features_train, features_test, classes_train, classes_test = train_test_split(all_features, all_classes, train_size=train_percent, test_size=(1-train_percent), random_state=0, stratify=all_classes)
    class_names = np.unique(all_classes) #numpy.ndarray
    feature_names = list(all_features) #list
    return {'features_train':features_train, 'features_test':features_test, 'classes_train':classes_train, 'classes_test':classes_test, 'class_names':class_names, 'feature_names':feature_names} #return dictionary. data within dictionary are DataFrames.
#------------------------------------------------------------------------------------------------------------





#------------------------------------------------------------------------------------------------------------ 

# UMAPに関する関数

#------------------------------------------------------------------------------------------------------------ 


def run_umap_spec(df, feature_columns, label='unknown', n_neighbors=15, supervised=True, sample_train=True):
    # training fraction is hardcoded to whatever was done with the random forest (from predicted data column)
    # get rows which were used in the RF training - will not have entries in class_pred
    df_train = df.loc[df['class_pred'].isnull()]
    df_test = df.loc[df['class_pred'].notnull()]

    # training on too much data can cause umap to loose global structures
    if sample_train==True:
        print('Selecting half the training data...')
        # downsample galaxies, there too many, it confuse UMAP
        #df_train_g = df_train[df_train['class']=='GALAXY'].sample(frac=0.5)
        #df_train_q = df_train[df_train['class']=='QSO']
        #df_train_s = df_train[df_train['class']=='STAR']
        #df_train = df_train[0::2] # half as much data
        df_train = df_train.sample(frac=0.5)
        #df_train = pd.concat([df_train_g, df_train_q, df_train_s])

    print('Clustering with UMAP')
    if supervised==False:
        print('Doing unsupervised UMAP')
        print('Fitting to {0} data points...'.format(len(df_train)))
        u_model = umap.UMAP(random_state=42, n_neighbors=n_neighbors).fit(df_train[feature_columns])
        save_obj(u_model, 'HSC_umap_save/unsupervised/umap_model_unsup'+label) # save for use on photometric objects
    if supervised==True:
        print('Doing supervised UMAP')
        print('Fitting to {0} data points...'.format(len(df_train)))
        u_model = umap.UMAP(random_state=42, n_neighbors=n_neighbors).fit(df_train[feature_columns], y=df_train['class_i'])
        save_obj(u_model, 'HSC_umap_save/supervised/umap_model_sup'+label) # save for use on photometric objects

    u_train = u_model.transform(df_train[feature_columns])
    u_test = u_model.transform(df_test[feature_columns])

    #u = pd.DataFrame(u, columns=['x', 'y'], index=data_prep_dict_all['features_train'].index) # index must match original df, particularly if sub-sampled
    u_train_df = pd.DataFrame(u_train, columns=['x', 'y'], index=df_train.index) # index must match original df, particularly if sub-sampled
    u_test_df = pd.DataFrame(u_test, columns=['x', 'y'], index=df_test.index) # index must match original df, particularly if sub-sampled
    u = pd.concat([u_train_df, u_test_df]) # joins the two dfs together
    df = df.join(u, how='left') # join UMAP projection to original df
    df['class_cat'] = df['class'].astype('category') # datashader requires catagorical type for colour labels.

    return df
#------------------------------------------------------------------------------------------------------------



def plot_umap_ds_SpecObjs_classes(df, sup, label='unknown'):
    # Plotting: use datashader
    df_train = df.loc[df['class_pred'].isnull()] # get rows which were used in the RF training - will not have entries in class_pred\
    #df_train = df_train[df_train.psf_r>0]
    df_test = df.loc[df['class_pred'].notnull()]
    #df_test = df_test[df_test.psf_r>0]
    # Plot main figure. Save images for both train and test sets
    for dfs, label2 in zip([df_train, df_test], ['train', 'test']):

        # create png
        cvs = ds.Canvas(plot_width=500, plot_height=500) #解像度を下げ、るために500にしている。
        agg = cvs.points(dfs, 'x', 'y', ds.count_cat('class_cat'))
        ckey = dict(GALAXY=(101,236,101), QSO='hotpink', STAR='dodgerblue')
        #cm = partial(colormap_select, reverse=('black'!="black"))
        img = tf.shade(agg, color_key=ckey, how='log')
        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-'+label2+'-RFclasslabels', fmt='.png', background='white')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-'+label2+'-RFclasslabels', fmt='.png', background='white')

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,10)) # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-'+label2+'-RFclasslabels.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-'+label2+'-RFclasslabels.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        '''
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.Colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=mpl.cm.Greys), orientation='horizontal', label='Number of sources', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        '''
        # legend for class labels
        g_leg = plt.Line2D((0,0),(0,0), color='lightgreen', marker='', linestyle='', label='Galaxies')
        q_leg = plt.Line2D((0,0),(0,0), color='hotpink', marker='', linestyle='', label='Quasars')
        s_leg = plt.Line2D((0,0),(0,0), color='dodgerblue', marker='', linestyle='', label='Stars')
        leg = plt.legend([g_leg, q_leg, s_leg], ['Galaxies', 'Quasars', 'Stars'], frameon=False, fontsize=22)
        leg_texts = leg.get_texts()
        leg_texts[0].set_color('lightgreen')
        leg_texts[1].set_color('hotpink')
        leg_texts[2].set_color('dodgerblue')

        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-'+label2+'-RFclasslabels.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-'+label2+'-RFclasslabels.pdf', bbox_inches='tight')
        plt.close(fig)
#------------------------------------------------------------------------------------------------------------


def plot_umap_ds_SpecObjs_probs(df, sup, label='unknown', GQSsplit=False):
    # Plotting: use datashader
    #df_train = df.loc[df['class_pred'].isnull()] # get rows which were used in the RF training - will not have entries in class_pred\
    df_test = df.loc[df['class_pred'].notnull()]
    #df_test = df_test[df_test.psf_r>0]

    # Plot probability mean
    cvs = ds.Canvas(plot_width=500, plot_height=500)
    agg = cvs.points(df_test, 'x', 'y', ds.mean('prob_best'))
    img = tf.shade(agg, cmap=prob_mean_c, how='log')
    if sup=='unsup':
        export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean', fmt='.png', background='black')
    if sup=='sup':
        export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-mean', fmt='.png', background='black')

    # generate figure with png created and append colourbar axis
    fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
    if sup=='unsup':
        img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean.png')
    if sup=='sup':
        img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean.png')
    plt.imshow(img)
    plt.gca().set_axis_off()
    plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
    # create new axis below main axis for colourbar
    ax_divider = make_axes_locatable(plt.gca())
    cax = ax_divider.append_axes("bottom", size="5%", pad="1%")
    # get min and max values from the data binned by datashader to use as limits for the colourbar
    vmin = agg.data[np.isfinite(agg.data)].min() # isfinite to ignore nans
    vmax = agg.data[np.isfinite(agg.data)].max()
    cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', pad=0.01, cax=cax) #, label='Mean probability for predicted sources (test dataset)'
    ticks = np.linspace(0.4, 1, 7)
    cbar.set_ticks(ticks)
    cbar.ax.tick_params(labelsize=30)
    formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
    cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
    cax.xaxis.set_minor_formatter(formatter)
    
    fig.tight_layout()
    if sup=='unsup':
        fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean.pdf', bbox_inches='tight')
    if sup=='sup':
        fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean.pdf', bbox_inches='tight')
    plt.close(fig)



    # Plot probability STD
    cvs = ds.Canvas(plot_width=500, plot_height=500)
    agg = cvs.points(df_test, 'x', 'y', ds.std('prob_best'))
    img = tf.shade(agg, cmap=prob_std_c, how='log')
    if sup=='unsup':
        export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std', fmt='.png', background='black')
    if sup=='sup':
        export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-std', fmt='.png', background='black')
    # generate figure with png created and append colourbar axis
    fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
    if sup=='unsup':
        img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std.png')
    if sup=='sup':
        img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-std.png')
    plt.imshow(img)
    plt.gca().set_axis_off()
    plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
    # create new axis below main axis for colourbar
    ax_divider = make_axes_locatable(plt.gca())
    cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
    # get min and max values from the data binned by datashader to use as limits for the colourbar
    a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
    vmin = a[np.isfinite(a.data)].min() # drop nans
    vmax = a[np.isfinite(a.data)].max()
    #vmin=9e-4 # min value is 0.001 so set a little smaller for cbar ticks to be clearer. can't be zero for cbar scale.
    print(vmin, vmax)
    cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', pad=0.01, cax=cax) #label='Standard deviation of probabilities for predicted sources (test dataset)',
    cax.tick_params(which='both', labelbottom='off')
    # ticks = np.linspace(0.4, 1, 7)
    # cbar.set_ticks(ticks)
    cbar.ax.tick_params(labelsize=30)
    #cax.tick_params(which='major', direction='out', length=4)

    fig.tight_layout()
    if sup=='unsup':
        fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std.pdf', bbox_inches='tight')
    if sup=='sup':
        fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-std.pdf', bbox_inches='tight')
    plt.close(fig)

    '''
    # Plot mean stack image
    # Can i edit this colour select out?
    #colors_g = [(1,1,1), (112/255,128/255,144/255)]
    #cmap_g = make_cmap(colors_g)
    colors_g = [(1,1,1), (144/255,238/255,144/255)]
    cmap_g = make_cmap(colors_g)
    colors_q = [(1,1,1), (255/255,105/255,180/255)]
    cmap_q = make_cmap(colors_q)
    colors_s = [(1,1,1), (30/255,144/255,255/255)]
    cmap_s = make_cmap(colors_s)

    cvs = ds.Canvas(plot_width=500, plot_height=500)
    agg = cvs.points(df_test, 'x', 'y', ds.mean('prob_best'))
    agg_g = cvs.points(df_test, 'x', 'y', ds.mean('prob_g'))
    agg_q = cvs.points(df_test, 'x', 'y', ds.mean('prob_q'))
    agg_s = cvs.points(df_test, 'x', 'y', ds.mean('prob_s'))
    img = stack( tf.shade(agg_g, cmap=cmap_g, how='log', alpha=100),
                 tf.shade(agg_q, cmap=cmap_q, how='log', alpha=100),
                 tf.shade(agg_s, cmap=cmap_s, how='log', alpha=100))
    export_image(img, 'UMAP'+label+'-meanstack', fmt='.png', background='black')
    '''

    if GQSsplit==True:
        # ------ GALAXIES ------
        # Plot mean prob
        cvs = ds.Canvas(plot_width=500, plot_height=500)
        agg = cvs.points(df_test[ (df_test.class_pred=='GALAXY') ], 'x', 'y', ds.mean('prob_g'))
        img = tf.shade(agg, cmap=prob_mean_c, how='log')
        img = tf.dynspread(img, threshold=threshold_g, max_px=max_px, shape='square', how='over')
        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-galaxies', fmt='.png', background='black')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-galaxies', fmt='.png', background='black')

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-galaxies.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-galaxies.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', label='Mean probability for predicted galaxies (test dataset)', pad=0.01, cax=cax)
        formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
        cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
        cax.xaxis.set_minor_formatter(formatter)
        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-galaxies.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-galaxies.pdf', bbox_inches='tight')
        plt.close(fig)


        # Plot galaxy std
        cvs = ds.Canvas(plot_width=500, plot_height=500)
        agg = cvs.points(df_test[ (df_test.class_pred=='GALAXY') ], 'x', 'y', ds.std('prob_g'))
        img = tf.shade(agg, cmap=prob_std_c, how='log')
        img = tf.dynspread(img, threshold=threshold_g, max_px=max_px, shape='square', how='over')
        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-galaxies', fmt='.png', background='black')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-std-galaxies', fmt='.png', background='black')

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-galaxies.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-std-galaxies.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', label='Standard deviation of probabilities for predicted galaxies (test dataset)', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-galaxies.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-std-galaxies.pdf', bbox_inches='tight')
        plt.close(fig)


        # ------ QUASARS ------
        # Plot quasar mean probs
        cvs = ds.Canvas(plot_width=500, plot_height=500)
        agg = cvs.points(df[df.class_pred=='QSO'], 'x', 'y', ds.mean('prob_q'))
        img = tf.shade(agg, cmap=prob_mean_c, how='log')
        img = tf.dynspread(img, threshold=threshold_q, max_px=max_px, shape='square', how='over')

        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-quasars', fmt='.png', background='black')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-quasars', fmt='.png', background='black')

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-quasars.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-quasars.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', label='Mean probability for predicted quasars (test dataset)', pad=0.01, cax=cax)
        formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
        cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
        cax.xaxis.set_minor_formatter(formatter)
        #cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-quasars.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-quasars.pdf', bbox_inches='tight')
        plt.close(fig)


        # Plot quasar std
        cvs = ds.Canvas(plot_width=500, plot_height=500)
        agg = cvs.points(df[df.class_pred=='QSO'], 'x', 'y', ds.std('prob_q'))
        img = tf.shade(agg, cmap=prob_std_c, how='log')
        img = tf.dynspread(img, threshold=threshold_q, max_px=max_px, shape='square', how='over')
        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-quasars', fmt='.png', background='black')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-std-quasars', fmt='.png', background='black')

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-quasars.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-std-quasars.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', label='Standard deviation of probabilities for predicted quasars (test dataset)', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-quasars.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-std-quasars.pdf', bbox_inches='tight')
        plt.close(fig)


        # ------ STARS ------
        # Plot star probs
        cvs = ds.Canvas(plot_width=500, plot_height=500)
        agg = cvs.points(df[df.class_pred=='STAR'], 'x', 'y', ds.mean('prob_s'))
        img = tf.shade(agg, cmap=prob_mean_c, how='log')
        img = tf.dynspread(img, threshold=threshold_s, max_px=max_px, shape='square', how='over')
        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-stars', fmt='.png', background='black')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-stars', fmt='.png', background='black')
        
        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight)) # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-stars.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-stars.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', label='Mean probability for predicted stars (test dataset)', pad=0.01, cax=cax)
        formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
        cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
        cax.xaxis.set_minor_formatter(formatter)
        #cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-mean-stars.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-mean-stars.pdf', bbox_inches='tight')
        plt.close(fig)



        # Plot star std
        cvs = ds.Canvas(plot_width=500, plot_height=500)
        agg = cvs.points(df[df.class_pred=='STAR'], 'x', 'y', ds.std('prob_s'))
        img = tf.shade(agg, cmap=prob_std_c, how='log')
        img = tf.dynspread(img, threshold=threshold_s, max_px=max_px, shape='square', how='over')
        if sup=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-stars', fmt='.png', background='black')
        if sup=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-'+label+'-probs-std-stars', fmt='.png', background='black')

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight)) # y axis larger to fit cbar in
        if sup=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-stars.png')
        if sup=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-'+label+'-probs-std-stars.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        vmin = 1.01e-3
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', label='Standard deviation of probabilities for predicted stars (test dataset)', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if sup=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-'+label+'-probs-std-stars.pdf', bbox_inches='tight')
        if sup=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-'+label+'-probs-std-stars.pdf', bbox_inches='tight')
        plt.close(fig)
#------------------------------------------------------------------------------------------------------------



def plot_umap_ds_PhotoObjs_classes(df, sup, label='unknown'):
    # Plotting: use datashader
    # Plot classes
    cvs = ds.Canvas(plot_width=500, plot_height=500)
    agg = cvs.points(df, 'x_'+sup, 'y_'+sup, ds.count_cat('class_cat'))
    ckey = dict(GALAXY=(101,236,101), QSO='hotpink', STAR='dodgerblue')
    img = tf.shade(agg, color_key=ckey, how='log')
    if suplab=='unsup':
        export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-RFclasslabels', fmt='.png', background='white')
    if suplab=='sup':
        export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-RFclasslabels', fmt='.png', background='white')
    

    # generate figure with png created and append colourbar axis
    fig = plt.figure(figsize=(10,10)) # y axis larger to fit cbar in
    if suplab=='unsup':
        img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-RFclasslabels.png')
    if suplab=='sup':
        img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-RFclasslabels.png')
    plt.imshow(img)
    plt.gca().set_axis_off()
    plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
    '''
    # create new axis below main axis for colourbar
    ax_divider = make_axes_locatable(plt.gca())
    cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
    # get min and max values from the data binned by datashader to use as limits for the colourbar
    a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
    vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
    vmax = a[np.isfinite(a)].max()
    cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=mpl.cm.Greys), orientation='horizontal', label='Number of sources', pad=0.01, cax=cax)
    cax.tick_params(which='both', labelbottom='off')
    '''
    # legend for class labels
    g_leg = plt.Line2D((0,0),(0,0), color='lightgreen', marker='', linestyle='', label='Galaxies')
    q_leg = plt.Line2D((0,0),(0,0), color='hotpink', marker='', linestyle='', label='Quasars')
    s_leg = plt.Line2D((0,0),(0,0), color='dodgerblue', marker='', linestyle='', label='Stars')
    leg = plt.legend([g_leg, q_leg, s_leg], ['Galaxies', 'Quasars', 'Stars'], frameon=False, fontsize=22)
    leg_texts = leg.get_texts()
    leg_texts[0].set_color('lightgreen')
    leg_texts[1].set_color('hotpink')
    leg_texts[2].set_color('dodgerblue')

    fig.tight_layout()
    if suplab=='unsup':
        fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-RFclasslabels.pdf', bbox_inches='tight')
    if suplab=='sup':
        fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-RFclasslabels.pdf', bbox_inches='tight')
    plt.close(fig)
#------------------------------------------------------------------------------------------------------------



def plot_umap_ds_PhotoObjs_probs(df, sup, label='unknown', GQSsplit=False):
    # Plotting: use datashader

    # Plot probability mean
    cvs = ds.Canvas(plot_width=1000, plot_height=1000)
    agg = cvs.points(df, 'x_'+sup, 'y_'+sup, ds.mean('prob_best'))
    img = tf.shade(agg, cmap=prob_mean_c, how='log')
    if suplab=='unsup':
        export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean', fmt='.png', background='black')
    if suplab=='sup':
        export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean', fmt='.png', background='black')
    

    # generate figure with png created and append colourbar axis
    fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
    if suplab=='unsup':
        img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean.png')
    if suplab=='sup':
        img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean.png')
    
    plt.imshow(img)
    plt.gca().set_axis_off()
    plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
    # create new axis below main axis for colourbar
    ax_divider = make_axes_locatable(plt.gca())
    cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
    # get min and max values from the data binned by datashader to use as limits for the colourbar
    vmin = agg.data[np.isfinite(agg.data)].min() # isfinite to ignore nans
    vmax = agg.data[np.isfinite(agg.data)].max()
    cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', pad=0.01, cax=cax) #
    formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
    cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
    cax.xaxis.set_minor_formatter(formatter)
    ticks = np.linspace(0.4, 1, 7)
    cbar.set_ticks(ticks)
    cbar.ax.tick_params(labelsize=30)
    fig.tight_layout()
    if suplab=='unsup':
        fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean.pdf', bbox_inches='tight')
    if suplab=='sup':
        fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean.pdf', bbox_inches='tight')
    
    plt.close(fig)



    # Plot probability STD
    cvs = ds.Canvas(plot_width=1000, plot_height=1000)
    agg = cvs.points(df, 'x_'+sup, 'y_'+sup, ds.std('prob_best'))
    img = tf.shade(agg, cmap=prob_std_c, how='log')
    if suplab=='unsup':
        export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std', fmt='.png', background='black')
    if suplab=='sup':
        export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std', fmt='.png', background='black')
    

    # generate figure with png created and append colourbar axis
    fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
    if suplab=='unsup':
        img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std.png')
    if suplab=='sup':
        img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std.png')
    
    plt.imshow(img)
    plt.gca().set_axis_off()
    plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
    # create new axis below main axis for colourbar
    ax_divider = make_axes_locatable(plt.gca())
    cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
    # get min and max values from the data binned by datashader to use as limits for the colourbar
    a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
    vmin = a[np.isfinite(a.data)].min() # drop nans
    vmax = a[np.isfinite(a.data)].max()
    #vmin=1.01e-3 # min value is 0.001 so set a little smaller for cbar ticks to be clearer. can't be zero for cbar scale.
    print(vmin, vmax)
    cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', pad=0.01, cax=cax) #, label='Standard deviation of probabilities for predicted sources (unclassified dataset)'
    cax.tick_params(which='both', labelbottom='off')
    #cax.tick_params(which='major', direction='out', length=4)
    cbar.ax.tick_params(labelsize=30)

    fig.tight_layout()
    if suplab=='unsup':
        fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std.pdf', bbox_inches='tight')
    if suplab=='sup':
        fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std.pdf', bbox_inches='tight')
    
    plt.close(fig)


    if GQSsplit==True:
        # ------ GALAXIES ------
        # Plot mean prob
        cvs = ds.Canvas(plot_width=1000, plot_height=1000)
        agg = cvs.points(df[ (df.class_pred=='GALAXY') ], 'x_'+sup, 'y_'+sup, ds.mean('prob_g'))
        img = tf.shade(agg, cmap=prob_mean_c, how='log')
        #img = tf.dynspread(img, threshold=threshold_g, max_px=max_px, shape='square', how='over')
        if suplab=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-galaxies', fmt='.png', background='black')
        if suplab=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-galaxies', fmt='.png', background='black')
        

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if suplab=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-galaxies.png')
        if suplab=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-galaxies.png')
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', label='Mean probability for predicted galaxies (unclassified dataset)', pad=0.01, cax=cax)
        formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
        cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
        cax.xaxis.set_minor_formatter(formatter)
        fig.tight_layout()
        if suplab=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-galaxies.pdf', bbox_inches='tight')
        if suplab=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-galaxies.pdf', bbox_inches='tight')
        
        plt.close(fig)


        # Plot galaxy std
        cvs = ds.Canvas(plot_width=1000, plot_height=1000)
        agg = cvs.points(df[ (df.class_pred=='GALAXY') ], 'x_'+sup, 'y_'+sup, ds.std('prob_g'))
        img = tf.shade(agg, cmap=prob_std_c, how='log')
        #img = tf.dynspread(img, threshold=threshold_g, max_px=max_px, shape='square', how='over')
        if suplab=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-galaxies', fmt='.png', background='black')
        if suplab=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-galaxies', fmt='.png', background='black')
        

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if suplab=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-galaxies.png')
        if suplab=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-galaxies.png')
        
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        vmin=1.01e-3
        if sup=='sup':
            vmin=8e-4
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', label='Standard deviation of probabilities for predicted galaxies (unclassified dataset)', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if suplab=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-galaxies.pdf', bbox_inches='tight')
        if suplab=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-galaxies.pdf', bbox_inches='tight')
        
        plt.close(fig)


        # ------ QUASARS ------
        # Plot quasar mean probs
        cvs = ds.Canvas(plot_width=1000, plot_height=1000)
        agg = cvs.points(df[df.class_pred=='QSO'], 'x_'+sup, 'y_'+sup, ds.mean('prob_q'))
        img = tf.shade(agg, cmap=prob_mean_c, how='log')
        #img = tf.dynspread(img, threshold=threshold_q, max_px=max_px, shape='square', how='over')
        if suplab=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-quasars', fmt='.png', background='black')
        if suplab=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-quasars', fmt='.png', background='black')
        

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if suplab=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-quasars.png')
        if suplab=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-quasars.png')
        
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', label='Mean probability for predicted quasars (unclassified dataset)', pad=0.01, cax=cax)
        formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
        cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
        cax.xaxis.set_minor_formatter(formatter)
        #cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if suplab=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-quasars.pdf', bbox_inches='tight')
        if suplab=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-quasars.pdf', bbox_inches='tight')
        
        plt.close(fig)


        # Plot quasar std
        cvs = ds.Canvas(plot_width=1000, plot_height=1000)
        agg = cvs.points(df[df.class_pred=='QSO'], 'x_'+sup, 'y_'+sup, ds.std('prob_q'))
        img = tf.shade(agg, cmap=prob_std_c, how='log')
        #img = tf.dynspread(img, threshold=threshold_q, max_px=max_px, shape='square', how='over')
        if suplab=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-quasars', fmt='.png', background='black')
        if suplab=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-quasars', fmt='.png', background='black')
        

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight))  # y axis larger to fit cbar in
        if suplab=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-quasars.png')
        if suplab=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-quasars.png')
        
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', label='Standard deviation of probabilities for predicted quasars (unclassified dataset)', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if suplab=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-quasars.pdf', bbox_inches='tight')
        if suplab=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-quasars.pdf', bbox_inches='tight')
        
        plt.close(fig)


        # ------ STARS ------
        # Plot star probs
        cvs = ds.Canvas(plot_width=1000, plot_height=1000)
        agg = cvs.points(df[df.class_pred=='STAR'], 'x_'+sup, 'y_'+sup, ds.mean('prob_s'))
        img = tf.shade(agg, cmap=prob_mean_c, how='log')
        #img = tf.dynspread(img, threshold=threshold_s, max_px=max_px, shape='square', how='over')
        if suplab=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-stars', fmt='.png', background='black')
        if suplab=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-stars', fmt='.png', background='black')
        

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight)) # y axis larger to fit cbar in
        if suplab=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-stars.png')
        if suplab=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-stars.png')
        
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_mean_c), orientation='horizontal', label='Mean probability for predicted stars (unclassified dataset)', pad=0.01, cax=cax)
        formatter = mpl.ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) # format cbar labels as decimals, not exponents
        cax.xaxis.set_major_formatter(formatter) # apply formatter to major and minor axes
        cax.xaxis.set_minor_formatter(formatter)
        #cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if suplab=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-mean-stars.pdf', bbox_inches='tight')
        if suplab=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-mean-stars.pdf', bbox_inches='tight')
        
        plt.close(fig)



        # Plot star std
        cvs = ds.Canvas(plot_width=1000, plot_height=1000)
        agg = cvs.points(df[df.class_pred=='STAR'], 'x_'+sup, 'y_'+sup, ds.std('prob_s'))
        img = tf.shade(agg, cmap=prob_std_c, how='log')
        #img = tf.dynspread(img, threshold=threshold_s, max_px=max_px, shape='square', how='over')
        if suplab=='unsup':
            export_image(img, 'HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-stars', fmt='.png', background='black')
        if suplab=='sup':
            export_image(img, 'HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-stars', fmt='.png', background='black')
        

        # generate figure with png created and append colourbar axis
        fig = plt.figure(figsize=(10,fheight)) # y axis larger to fit cbar in
        if suplab=='unsup':
            img = mpimg.imread('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-stars.png')
        if suplab=='sup':
            img = mpimg.imread('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-stars.png')
        
        plt.imshow(img)
        plt.gca().set_axis_off()
        plt.tick_params(axis='both', which='both', right=False, left=False, top=False, bottom=False)
        # create new axis below main axis for colourbar
        ax_divider = make_axes_locatable(plt.gca())
        cax = ax_divider.append_axes("bottom", size="3%", pad="1%")
        # get min and max values from the data binned by datashader to use as limits for the colourbar
        a = agg.data[np.nonzero(agg.data)] # remove zeros to stop log colour scale going wrong
        vmin = a[np.isfinite(a)].min() # isfinite to ignore nans
        vmax = a[np.isfinite(a)].max()
        vmin = 1.01e-3
        cbar = mpl.pyplot.colorbar(mpl.cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=vmin, vmax=vmax), cmap=prob_std_c), orientation='horizontal', label='Standard deviation of probabilities for predicted stars (unclassified dataset)', pad=0.01, cax=cax)
        cax.tick_params(which='both', labelbottom='off')
        fig.tight_layout()
        if suplab=='unsup':
            fig.savefig('HSC_umap_save/unsupervised/UMAP-photo-'+label+'-probs-std-stars.pdf', bbox_inches='tight')
        if suplab=='sup':
            fig.savefig('HSC_umap_save/supervised/UMAP-photo-'+label+'-probs-std-stars.pdf', bbox_inches='tight')
        
        plt.close(fig)
#------------------------------------------------------------------------------------------------------------

## Main Code

## 色やパラメーターの設定

In [4]:
mpl.rcParams.update({'font.size': 10})
mpl.rcParams.update({'figure.dpi': 100})

fheight = 10

threshold_g = 0.92 
threshold_q = 0.90 
threshold_s = 0.85
max_px = 2 # more than enough, 3 makes pixels too big

prob_mean_c = mpl.cm.YlOrBr
prob_std_c = mpl.cm.YlOrBr



colors_g = [(1,1,1), (101/255,236/255,101/255)] # alexgreen
cmap_g = make_cmap(colors_g)
colors_q = [(1,1,1), (255/255,105/255,180/255)] # hotpink
cmap_q = make_cmap(colors_q)
colors_s = [(1,1,1), (30/255,144/255,255/255)] # dodgerblue
cmap_s = make_cmap(colors_s)

## 天体種族が既知なデータ(spec data)の読み込み & 準備

In [9]:
# cmodel = ['g_cmodel_mag', 'r_cmodel_mag', 'i_cmodel_mag', 'z_cmodel_mag', 'y_cmodel_mag']
psf = ['g_psfflux_mag','r_psfflux_mag', 'i_psfflux_mag', 'z_psfflux_mag', 'y_psfflux_mag']
color = ['g-r', 'r-i', 'i-z', 'z-y']

feature_columns = psf + ['ishape'] + color

place = "psf_ishape_color"
label='spec-halftrain'

dfspec = load_obj('HSC_ML_save/'+place+'/df_spec_classprobs') # もしうまく動かなかったら、HSC_MLをもう一度回し、df_spec_classprobsを作る

#dfphoto = load_obj(sys.argv[1])
print(dfspec.shape)

dfspec['class_i'] = -1 # umap ではclassは数字である必要がある. -1はどのクラスにも適してないことを表す (semi-sup)
dfspec.loc[dfspec['class']=='GALAXY', 'class_i'] = 1
dfspec.loc[dfspec['class']=='QSO', 'class_i'] = 2
dfspec.loc[dfspec['class']=='STAR', 'class_i'] = 3
dfspec['class_cat'] = dfspec['class'].astype('category')

(201493, 57)


## モデルの作成 & プロット

In [11]:
for supervised, suplab in zip([True, False], ['sup','unsup']):
    dfspec_umap = run_umap_spec(dfspec, feature_columns, label=label, supervised=supervised, sample_train=True)
    # dfspec_umap = run_umap_spec(dfspec, feature_columns, label=label, supervised=supervised, sample_train=False)
    dfspec_umap['prob_best'] = dfspec_umap[['prob_g', 'prob_q', 'prob_s']].max(axis=1)
    if supervised==False:
        save_obj(dfspec_umap, 'HSC_umap_save/unsupervised/dfspec-'+label+'-'+suplab) # save for plotting later
    if supervised==True:
        save_obj(dfspec_umap, 'HSC_umap_save/supervised/dfspec-'+label+'-'+suplab) # save for plotting later
    #dfspec_umap = load_obj('dfspec'+label)

# plot classes
for suplab in ['sup','unsup']:
    if suplab=='unsup':
        dfspec_umap = load_obj('HSC_umap_save/unsupervised/dfspec-'+label+'-'+suplab)
    if suplab=='sup':
        dfspec_umap = load_obj('HSC_umap_save/supervised/dfspec-'+label+'-'+suplab)
    plot_umap_ds_SpecObjs_classes(dfspec_umap, sup=suplab, label=label+'-'+suplab)

# plot probabilities
for suplab in ['sup','unsup']:
    if suplab=='unsup':
        dfspec_umap = load_obj('HSC_umap_save/unsupervised/dfspec-'+label+'-'+suplab)
    if suplab=='sup':
        dfspec_umap = load_obj('HSC_umap_save/supervised/dfspec-'+label+'-'+suplab)
    plot_umap_ds_SpecObjs_probs(dfspec_umap, sup=suplab, label=label+'-'+suplab, GQSsplit=False)

Selecting half the training data...
Clustering with UMAP
Doing supervised UMAP
Fitting to 50373 data points...
Selecting half the training data...
Clustering with UMAP
Doing unsupervised UMAP
Fitting to 50373 data points...
0.001287696884094271 0.29000000000000004
0.0015713484026367627 0.28874248426967275


## 未分類データにUMAPを使用する

## データの読み込み & 準備

In [13]:
dfphoto = load_obj('HSC_classifynew/cmodel_ishape_color/mag_lim/classified_source') #分類された未分類の天体
umap_model_sup = load_obj('HSC_umap_save/supervised/umap_model_supspec-halftrain' ) #訓練データの半分を使った教師ありumapモデル
umap_model_unsup = load_obj('HSC_umap_save/unsupervised/umap_model_unsupspec-halftrain') #訓練データの半分を使った教師なしumapモデル

dfphoto_split = np.array_split(dfphoto, 5) # 負担を減らすためデータを分割する
u_photo_sup=[]
u_photo_unsup=[]

# 必要な場合保存
# save_obj(dfphoto_split, 'df_photo_split')
# dfphoto_split = load_obj('df_photo_split')

In [14]:
# とても時間がかかる。環境が良くない場合、以下のブロックに分けて行う方法を推奨
for d, idx in zip(dfphoto_split, range(1,6)):
    print('Transforming dfphoto part {0}/5...'.format(idx))
    features = d[[*feature_columns]]
    # supervised model
    u_sup = umap_model_sup.transform(features)
    u_photo_sup.append(u_sup)
    # unsupervised model
    u_unsup = umap_model_unsup.transform(features)
    u_photo_unsup.append(u_unsup)


# #------------------------------------------------------------------------------------------------------------
# ブロックに分けて行う方法    
# #------------------------------------------------------------------------------------------------------------

# # dfphoto_split1
# features = dfphoto_split[0][[*feature_columns]]
# u_sup = umap_model_sup.transform(features)
# u_unsup = umap_model_unsup.transform(features)
# save_obj(u_sup, 'u_photo_sup_1')
# save_obj(u_unsup, 'u_photo_unsup_1')

# # dfphoto_split2
# features = dfphoto_split[1][[*feature_columns]]
# u_sup = umap_model_sup.transform(features)
# u_unsup = umap_model_unsup.transform(features)
# save_obj(u_sup, 'u_photo_sup_2')
# save_obj(u_unsup, 'u_photo_unsup_2')

# # dfphoto_split3
# features = dfphoto_split[2][[*feature_columns]]
# u_sup = umap_model_sup.transform(features)
# u_unsup = umap_model_unsup.transform(features)
# save_obj(u_sup, 'u_photo_sup_3')
# save_obj(u_unsup, 'u_photo_unsup_3')

# # dfphoto_split4
# features = dfphoto_split[3][[*feature_columns]]
# u_sup = umap_model_sup.transform(features)
# u_unsup = umap_model_unsup.transform(features)
# save_obj(u_sup, 'u_photo_sup_4')
# save_obj(u_unsup, 'u_photo_unsup_4')

# # dfphoto_split5
# features = dfphoto_split[4][[*feature_columns]]
# u_sup = umap_model_sup.transform(features)
# u_unsup = umap_model_unsup.transform(features)
# save_obj(u_sup, 'u_photo_sup_5')
# save_obj(u_unsup, 'u_photo_unsup_5')

# for idx in range(1,6):
#     u_sup = load_obj('u_photo_sup_' + str(idx))
#     u_photo_sup.append(u_sup)
#     u_unsup = load_obj('u_photo_unsup_' + str(idx))
#     u_photo_unsup.append(u_unsup)
# #------------------------------------------------------------------------------------------------------------

Transforming dfphoto part 1/5...


KeyboardInterrupt: 

In [15]:
# dataframeを使いやすくする
print('Cleaning up dataframes...')
for u_photo, suplab in zip([u_photo_sup, u_photo_unsup], ['sup', 'unsup']):
    u_photo = np.concatenate((u_photo[0], u_photo[1], u_photo[2], u_photo[3], u_photo[4])) # join 5 parts
    u_photo = pd.DataFrame(u_photo, columns=['x_'+suplab, 'y_'+suplab], index=dfphoto.index) # label cols for sup/unsup
    # join umap projection to original df
    dfphoto = dfphoto.join(u_photo, how='left')

# create catagorical class for datashader colours (bc it wont accept other formats)
dfphoto['class_cat'] = dfphoto['class_pred'].astype('category')
dfphoto['prob_best'] = dfphoto[['prob_g', 'prob_q', 'prob_s']].max(axis=1)

# 必要な場合保存
# save_obj(dfphoto, 'HSC_umap_save/df_photo_umapfromspec')
# dfphoto = load_obj('HSC_umap_save/df_photo_umapfromspec')

Cleaning up dataframes...


IndexError: list index out of range

# UMAPモデルの適用

In [17]:
dfphoto = load_obj('HSC_umap_save/df_photo_umapfromspec')

In [18]:
# plot classes
for suplab in ['sup','unsup']:
    plot_umap_ds_PhotoObjs_classes(dfphoto, sup=suplab, label=label+'-'+suplab)

# plot probabilities
for suplab in ['sup','unsup']:
    plot_umap_ds_PhotoObjs_probs(dfphoto, sup=suplab, label=label+'-'+suplab, GQSsplit=False)

0.0006249212598421871 0.3175
0.0005391265523477402 0.32999999999999996
