## Probability for each class vs. time for one object at a time

Selects out a subset of objects and classifiers.  Defines deltat to be the difference between the time of the detection and the "peak time" of the object.  Draws a heatmap for each object/classifier combination that shows the probability assigned to all classes as a function of deltat.

In [None]:
%matplotlib inline
import pandas
import matplotlib
import matplotlib.pyplot
import matplotlib.pyplot as plt
import seaborn

In [None]:
# Run the setup.  This will connect to the database, define function `run_query`, and define dictionaries `classifier_info` and `classname`
%run query_for_probabilistic_metrics_setup.ipynb

In [None]:
classname

In [None]:
# To make things run in finite time, for now just select out a couple of classifiers.
# 40 = Alerce 3.1.0 balto
# 44 = Antares 2.0.0 LiCuParsnip_filter
use_cfers = { c: classifier_info[c] for c in classifier_info.keys() if c in [ 40, 44] }
use_cfers

In [None]:
sys.stderr.write( f"Query start at {datetime.datetime.now().isoformat()}...\n" )
query = ( 'SELECT '
          '  v."classifierId",v."diaObjectId",v."classId",v."alertId",v."trueClassId",v."probability",v."alertSentTimestamp", '
          '  s."midPointTai"-ot.peakmjd AS deltat '
          'FROM elasticc_view_sourceclassifications v '
          'INNER JOIN elasticc_diasource s ON v."diaSourceId"=s."diaSourceId" '
          'INNER JOIN elasticc_diaobjecttruth ot ON v."diaObjectId"=ot."diaObjectId" '
          'WHERE "classifierId" IN %(cfers)s AND "trueClassId"=111 '
          'ORDER BY "diaObjectId","alertSentTimestamp" '
          'LIMIT 10000' )
rows = run_query( query, { 'cfers': tuple(use_cfers.keys()) } )
sys.stderr.write( f"Query done at {datetime.datetime.now().isoformat()}\n" )
data = pandas.DataFrame( rows ).set_index( ['diaObjectId', 'classifierId', 'classId'] )
data

In [None]:
# Figure out how many classifiers there are for each object
# I bet this is more roundabout than it needs to be
numcfersperobj = data.reset_index().groupby( ['diaObjectId', 'classifierId'] )['alertId'].first().reset_index().groupby('diaObjectId')['classifierId'].count()
objswithmultiplecfers = numcfersperobj[ numcfersperobj > 1 ].index.values
print( f"There are {len(numcfersperobj)} objects, {len(objswithmultiplecfers)} of which are classified by >1 classifier." )

In [None]:
for objid in objswithmultiplecfers[0:10]:
    objdf = data.xs( objid, level='diaObjectId' )
    cfers = objdf.index.get_level_values( 'classifierId' ).unique().values
    wid = 6 * len(cfers)
    fig = matplotlib.pyplot.figure( figsize=(wid,6), tight_layout=True )
    fig.suptitle( f"diaObjectId = {objid}, true class {classname[objdf['trueClassId'].iloc[0]]}" )
    for i, cfer in enumerate( cfers ):
        objcferdf = objdf.xs( cfer, level='classifierId' ).reset_index()
        objcferdf['deltat'] = objcferdf['deltat'].round(2)
        objcferdf['class'] = objcferdf['classId'].apply( lambda i : classname[i] )
        pt = pandas.pivot_table( objcferdf, values='probability', columns='deltat', index='class' )
        ax = fig.add_subplot( 1, len(cfers), i+1 )
        ax.set_title( f"{classifier_info[cfer]['brokerName']} {classifier_info[cfer]['brokerVersion']} "
                      f"{classifier_info[cfer]['classifierName']} {classifier_info[cfer]['classifierParams']}" )
        ax.set_xlabel( 'Δt (days)' )
        ax.set_ylabel( 'classId' )
        seaborn.heatmap( pt, ax=ax )
    fig.show()