In [1]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [45]:
#Calculate Weight for each parameter in the similarity index
def calculate_weight(ref_val, upper_lim, lower_lim, threshold=0.8):
  
  w_lower = math.log(threshold)/math.log(1-((ref_val - lower_lim)/(ref_val + lower_lim)))
  w_upper = math.log(threshold)/math.log(1-((upper_lim - ref_val)/(upper_lim + ref_val)))
  weight = round(math.sqrt(w_lower*w_upper), 2)
  
  return weight

In [242]:
#Calculate Earth Similarity Index for Individual Params

def calc_ESI_param(param, upper_lim, lower_lim,ref_val, threshold = 0.8):
  w = {'radius': 0.57, 'density': 1.07, 'escape_velocity': 0.70, 'revolution': 0.70, 'surface_gravity': 0.13, 'surface_temperature': 5.58}

  ref_values = {'radius': 1, 'density': 1, 'escape_velocity': 1, 'revolution': 1, 'surface_gravity': 1, 'surface_temperature': 288}
  
  if (param.columns[0] in ref_values):
    if pd.isna(ref_val):
        ref_val = ref_values[param.columns[0]]
    if pd.isna(upper_lim) or pd.isna(lower_lim):
        weight = w[param.columns[0]]
    else:
        weight = calculate_weight(ref_val, upper_lim,  lower_lim,  threshold)
  else:
    weight = calculate_weight(ref_val, upper_lim,  lower_lim,  threshold)  
    
  ESI_P = [] 
  
  for i in range(len(param)):
    V = round(math.pow(1-abs((param.iat[i,0] - ref_val)/(ref_val + param.iat[i,0])), weight), 6)
    ESI_P.append(V)
  
  return ESI_P
# return weight 

In [243]:
#Pass an array of Params to calculate 
#Pass an array of upper lims for respective Params 
#Pass an array of lower lims for respective Params

def calc_ESI(params, upper_lims=None, lower_lims=None,ref_val=None):
    colnames = list(params.columns)
    
    #Default Upper Lims
    if upper_lims is None:
        upper_lims = [float("NaN")]*len(colnames)

    #Default Lower Lims 
    if lower_lims is None:
        lower_lims = [float("NaN")]*len(colnames)
   
    if ref_val is None:
        ref_val = [float("NaN")]*len(colnames)
        
    try:
        #Perform sanity checks 
        len(colnames) == len(upper_lims) == len(lower_lims) == len(ref_val)
        
        for i in range(0, len(upper_lims)):
            upper_lims[i]>=lower_lims[i]

        #Calculate Weights    
        ESI_df = pd.DataFrame()
        for i in range(0, len(colnames)):
            ESI_param = calc_ESI_param(params.iloc[:,[i]], upper_lims[i], lower_lims[i],ref_val[i])
            ESI_colname = "ESI_{}".format(colnames[i])
            ESI_df[ESI_colname] = ESI_param

        return ESI_df
    
    except ValueError as e:
        print(e)

    


In [244]:
params = ['radius', 'density']
upper_lims=[1.9, 1.5]
lower_lims = [0.5, 0.7]
ref_val = [1,1]
calc_ESI(new_df,upper_lims,lower_lims)

Unnamed: 0,ESI_radius,ESI_density
0,0.795160,0.968867
1,0.849972,0.949134
2,0.975223,0.937854
3,0.760925,0.797841
4,0.807548,0.890053
...,...,...
1562,0.957080,0.955483
1563,0.822139,0.790986
1564,0.962169,0.949666
1565,0.862991,0.963860


In [49]:
### MAKE USER ENTER CSV WITH VARIOUS COLUMNS CALCULATE ESI FOR EACH ENTRY 

#1. Read Column Names 
#2. Browse column names and select columns which are in the ESI Formula - Data Cleaning features we need to add 
#3. Give option to add custom parameters - FUTURE WORK 
#4. Calculate weights for each parameters 
#5. How to take upper lims and lower lims - FUTURE WORK 
#6. Calculate ESI 


In [237]:
def readData(csv):

    #1. Read Column Names 
    df = pd.read_csv(csv, index_col=0) 
    cols = list(df.columns)
    print(cols)
    cols_lower = [col.lower() for col in cols]
    print(cols_lower)

    #2. Browse column names and select columns which are in the ESI Formula
    columns = ['radius', 'density', 'escape_velocity', 'revolution']
    
    common_cols = []

    for i in range(0, len(columns)):     
        for j in range(0, len(cols_lower)):
            if columns[i] in cols_lower[j]:
                df.rename({cols[j]: columns[i]}, inplace=True, axis=1)
                common_cols.append(columns[i])

   
    new_df = df[common_cols].copy()   

    return new_df      




new_df = readData(r"D:/Downloads/Rock NESI.csv")

['P. Name Kepler', 'P. Name KOI', 'P. Composition Class', 'P. Mass (EU)', 'P. Radius (EU)', 'P. Density (EU)', 'P. Gravity (EU)', 'P. Esc Vel (EU)', 'P. Teq Mean (K)', 'P. Ts Mean (K)', 'P. Surf Press (EU)', 'P. Period (days)', 'P. Mean Distance (AU)', 'Distance (Km)', 'S. Mass (SU)', 'S.Mass (Kg)', 'Rev (Days)', 'Rev (EU)', 'ESI(R)', 'ESI(D)', 'ESI (Ve)', 'ESI(T)', 'ESI(Rev)', 'ESI(g)', 'ESI_I', 'ESI_S', 'ESI']
['p. name kepler', 'p. name koi', 'p. composition class', 'p. mass (eu)', 'p. radius (eu)', 'p. density (eu)', 'p. gravity (eu)', 'p. esc vel (eu)', 'p. teq mean (k)', 'p. ts mean (k)', 'p. surf press (eu)', 'p. period (days)', 'p. mean distance (au)', 'distance (km)', 's. mass (su)', 's.mass (kg)', 'rev (days)', 'rev (eu)', 'esi(r)', 'esi(d)', 'esi (ve)', 'esi(t)', 'esi(rev)', 'esi(g)', 'esi_i', 'esi_s', 'esi']


In [238]:
#4. Calculate weights for each parameters based on default limits
new_cols = list(new_df.columns)
print (new_df)
#OPTIONAL - Pass upper and lower lims or will take default values 
#calc_ESI(new_cols,upper_lims, lower_lims)

              radius  density
P. Name                      
55 Cnc e        1.99     1.06
61 Vir b        1.66     1.10
alf Cen B b     1.09     0.89
BD+20 594 b     2.23     1.47
BD-06 1339 b    1.91     1.23
...              ...      ...
TRAPPIST-1 d    1.16     0.92
WASP-47 e       1.82     1.49
Wolf 1061 b     1.14     0.91
Wolf 1061 c     1.59     1.07
Wolf 1061 d     1.68     1.11

[1567 rows x 2 columns]


In [239]:
new_df.columns

Index(['radius', 'density'], dtype='object')