In [None]:
import numpy as np
import pandas as pd

In [None]:
meta_data = pd.read_csv('../input/training_set_metadata.csv')
test_meta_data = pd.read_csv('../input/test_set_metadata.csv')

In [None]:
classes = np.unique(meta_data['target'])
classes_all = np.hstack([classes, [99]])

# create a dictionary {class : index} to map class number with the index 
# (index will be used for submission columns like 0, 1, 2 ... 14)
target_map = {j:i for i, j in enumerate(classes_all)}

# create 'target_id' column to map with 'target' classes
target_ids = [target_map[i] for i in meta_data['target']]
meta_data['target_id'] = target_ids

In [None]:
# Build the flat probability arrays for both the galactic and extragalactic groups
galactic_cut = meta_data['hostgal_specz'] == 0
galactic_data = meta_data[galactic_cut]
extragalactic_data = meta_data[~galactic_cut]

galactic_classes = np.unique(galactic_data['target_id'])
extragalactic_classes = np.unique(extragalactic_data['target_id'])

galactic_classes = np.append(galactic_classes, 14)
extragalactic_classes = np.append(extragalactic_classes, 14)

Weights are taken from this kernel: https://www.kaggle.com/ganfear/calculate-exact-class-weights

which is based on this discussion: https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194

In [None]:
# Flat probabilities for Milky Way galaxy
galactic_probabilities = np.zeros(15)
for x in galactic_classes:
    if(x == 14):
        galactic_probabilities[x] = 2.00241/7.00657
        continue
    if(x == 5):
        galactic_probabilities[x] = 1.00000/7.00657
        continue
    galactic_probabilities[x] = 1.00104/7.00657

In [None]:
# Weighted probabilities for Extra Galaxies
extragalactic_probabilities = np.zeros(15)
for x in extragalactic_classes:
    if(x == 14):
        extragalactic_probabilities[x] = 2.00241/13.01868
        continue
    if(x == 1):
        extragalactic_probabilities[x] = 2.00189/13.01868
        continue
    if(x == 7):
        extragalactic_probabilities[x] = 2.00710/13.01868
        continue
    extragalactic_probabilities[x] = 1.00104/13.01868

In [None]:
# Apply this prediction to a table
import tqdm
def do_prediction(table):
    probs = []
    for index, row in tqdm.tqdm(table.iterrows(), total=len(table)):
        if row['hostgal_photoz'] == 0:
            prob = galactic_probabilities
        else:
            prob = extragalactic_probabilities
        probs.append(prob)
    return np.array(probs)

pred = do_prediction(meta_data)
test_pred = do_prediction(test_meta_data)

In [None]:
test_df = pd.DataFrame(index=test_meta_data['object_id'], data=test_pred, columns=['class_%d' % i for i in classes_all])
test_df.to_csv('./naive_benchmark3.csv')