David Rice, Amber Malpas, Armaan Goyal<br>
Architectures ML Project<br>
Manage Tables

In [35]:
import re
import numpy as np

In [36]:
#Get all the data
inf=open('KOIData_Lissauer2023.csv','r')
lines=inf.readlines()

pltable=[] #List of all values for a planet
for line in lines[0:]:
    if line==lines[0]:
        colname=re.split(',',line) #Sheet column names
    else:
        div=re.split(',',line)
        pltable.append(div)

inf.close()

In [37]:
#Combine planets into system data
multis=[] #system & planet info for each system with 3+ planets
currentsys='x'
dumbplanets=[[]]
planetdata=['TTVperiod','radius','radius_em','radius_ep','nTT','SNR','MES','chisqwttv']
            #planet period, radius, radius minus error, radius plus error, number of transits with transit time measured, 
            #SNR, multiple event statistic, chi-squared with ttv (descriptions https://arxiv.org/pdf/2311.00238)
stardata=['kepmag','rhostar','teff','rstar','mstar','logg','[M/H]']
            #Kepler Magniuted, Stellar density, Stellar Mass, Log gravity, metalicity (descriptiosn https://arxiv.org/pdf/2311.00238)
for i in range(len(pltable)):
    if pltable[i][colname.index('StatusFlag')][0]=='P' and 0<float(pltable[i][colname.index('TTVperiod')])<1000: #Planet candidate in most recent survey and less than 1000 days
        if pltable[i][colname.index('KOI')][0:-3]!=currentsys:
            if len(dumbplanets[0])>2:
                sort_ind=np.argsort(dumbplanets[0]) #sort by period
                multis.append([currentsys]+[float(pltable[i-1][colname.index(j)]) for j in stardata]+[np.array(k)[sort_ind] for k in dumbplanets])
                dumbplanets=[[float(pltable[i][colname.index(j)])] for j in planetdata]
            else:
                dumbplanets=[[float(pltable[i][colname.index(j)])] for j in planetdata]
            currentsys=pltable[i][colname.index('KOI')][0:-3]
        else:
            for j in range(len(dumbplanets)):
                dumbplanets[j].append(float(pltable[i][colname.index(planetdata[j])]))
                
newcolumns=['KOI']+stardata+planetdata #save column data

In [38]:
#Gap Complexity
def gapcomp(periods):
    cmax=[0.106,0.212,0.291,0.350,0.398,0.437,0.469,0.497]
    firstpar=[]
    secondpar=[]
    for j in range(len(periods)-1):
        p_i=np.log(periods[j+1]/periods[j])/np.log(periods[-1]/periods[0])
        firstpar.append(p_i*np.log(p_i))
        secondpar.append((p_i-(1/(len(periods)-1)))**2)
    return (-(1/cmax[len(periods)-3])*np.sum(firstpar)*np.sum(secondpar))

#Gini Index
def gini(ptemp):
    prats=[]
    for j in range(len(ptemp)-1):
        prats.append(ptemp[j+1]/ptemp[j])
    sums1=[]
    for j in prats:
        sums2=[]
        for k in prats:
            sums2.append(np.abs(j-k))
        sums1.append(np.sum(sums2))
    return (1/(2*len(prats)**2*np.mean(prats))*len(prats)/(len(prats)-1)*np.sum(sums1))

#Intra-system dispersion Weiss 2022 Equation 3
def qdis(ptemp):
    prats=[]
    for j in range(len(ptemp)-1):
        prats.append(ptemp[j+1]/ptemp[j])
    sums=[]
    for j in prats:
        sums.append(np.log10(j/np.mean(prats))**2)
    return np.sqrt(1/(len(ptemp)-1)*np.sum(sums))    


#dp/p_i
def dpp(ptemp):
    dpptemp=[]
    for j in range(len(ptemp)-1):
        dpptemp.append((ptemp[j+1]-ptemp[j])/ptemp[j])
    return dpptemp, np.mean(dpptemp)
        
#zeta
def zeta(ptemp):
    zetatemp=[]
    for j in range(len(ptemp)-1):
        prat=ptemp[j+1]/ptemp[j]
        zetatemp.append(np.abs(3*(1/(prat-1)-round(1/(prat-1)))))
    return zetatemp, np.mean(zetatemp)

In [39]:
#Add additional period data
newcols=newcolumns+['n','minper','maxper','meanper','perrange','lograngep','normstdp','dpp','meandpp','gapc','gini','qdisp','zeta','meanzeta']
for i in range(len(multis)):
    periods=multis[i][newcolumns.index('TTVperiod')]
    radiis=multis[i][newcolumns.index('radius')]
    multis[i].append(len(multis[i][newcolumns.index('TTVperiod')])) #number of planets
    multis[i].append(periods[0]) #Outer period
    multis[i].append(periods[-1]) #inner period
    multis[i].append(np.mean(periods)) #mean period (never seen this used but threw it in there)
    multis[i].append(periods[-1]-periods[0]) #period range
    multis[i].append(np.log10(periods[-1])-np.log10(periods[0])) #range in log period
    multis[i].append(np.std(periods)/np.mean(periods)) #normalzied standard deviation
    multis[i].append(np.array(dpp(periods)[0])) #dp/p_inner array
    multis[i].append(dpp(periods)[1]) #mean dp/p_inner
    multis[i].append(gapcomp(periods)) #gap complexity
    multis[i].append(gini(periods)) #Gini Index
    multis[i].append(qdis(periods)) #intra-system dispersion, q
    multis[i].append(np.array(zeta(periods)[0])) #eta array
    multis[i].append(zeta(periods)[1]) #mean zeta

In [40]:
#intrasystem dispersion in log-radius
def qdisprad(rtemp):
    sums=[]
    for j in rtemp:
        sums.append(np.log10(j/np.mean(rtemp))**2)
    return np.sqrt(1/(len(rtemp))*np.sum(sums))    

#count planet types
def typecounter(rtemp):
    types=[0,0,0,0] #subearths, super-earths, sub-neptunes, neptune+
    for i in rtemp:
        if i<=1:
            types[0]=types[0]+1
        elif i<1.9:  #Using 1.9 Earth-radii for radius valley
            types[1]=types[1]+1 
        elif i<4: #4 for neptune radius approximately
            types[2]=types[2]+1
        else:
            types[3]=types[3]+1
    return types

#Ratio of adjacent radii 
def rrat(rtemp):
    rrattemp=[]
    for i in range(len(rtemp)-1):
        rrattemp.append(rtemp[i+1]/rtemp[i])
    return rrattemp, np.mean(rrattemp)

In [41]:
##Add additional radius data
newcols=newcols+['minrad','maxrad','meanrad','radrange','stdr','normstdr','qdispr','rrat','meanrrat','subearth','supearth','subnep','nepplus',]

for i in range(len(multis)):
    radiis=multis[i][newcolumns.index('radius')]
    multis[i].append(min(radiis)) #min radius
    multis[i].append(max(radiis))  #max radius
    multis[i].append(np.mean(radiis)) #mean radius
    multis[i].append(max(radiis)-min(radiis)) #range radius
    multis[i].append(np.std(radiis)) #radius standard deviation 
    multis[i].append(np.std(radiis)/np.mean(radiis)) #normalized standard deviation
    multis[i].append(qdisprad(radiis)) #dispersion of log radius ratio
    multis[i].append(np.array(rrat(radiis)[0])) #ratio of adjacent radius outer/inner
    multis[i].append(rrat(radiis)[1])
    multis[i]=multis[i]+typecounter(radiis)

In [42]:
#Write to file
outf=open('threeplanetsystemsinfo.csv','w')
for i in newcols:
    outf.write(i+',')
outf.write('\n')
for i in multis:
    for j in i:
        if isinstance(j, np.ndarray):
            outf.write(np.array2string(j, max_line_width=1000)+',')
        else:
            outf.write(str(j)+',')
    outf.write('\n')
outf.close()

In [9]:
solar=[88,225,365,687]
print(qdis(solar))
print(gapcomp(solar))
solarprat=[]
for i in range(3):
    solarprat.append(solar[i+1]/solar[i])
print(solarprat)
print(np.mean(solarprat))
print(qdis(solar)/np.mean(solarprat))
print(np.log10(qdis(solar)))

0.08264779296797711
0.12757698141193483
[2.5568181818181817, 1.6222222222222222, 1.8821917808219177]
2.0204107282874406
0.04090643145516843
-1.0827687393656946


In [10]:
#Kepler 286
k286=[1.796,3.468,5.914,29.22]
print(qdis(k286))
print(gapcomp(k286))
prat=[]
for i in range(3):
    prat.append(k286[i+1]/k286[i])
print(prat)

0.21280569434521746
0.4005081512395987
[1.930957683741648, 1.7053056516724336, 4.940818397024011]


In [21]:
multis[0]

['41',
 11.197,
 0.43884,
 5857.0,
 1.53,
 1.1,
 4.103,
 0.098,
 array([ 6.88707653, 12.81590514, 35.333574  ]),
 array([1.27 , 2.269, 1.528]),
 array([-0.054, -0.068, -0.064]),
 array([0.081, 0.128, 0.106]),
 array([198., 101.,  39.]),
 array([ 38.2, 106.7,  28.8]),
 array([27.5, 68.6, 18. ]),
 array([6359.2, 5592.3, 1934.7]),
 3,
 6.88707653,
 35.333574,
 18.345518556666665,
 28.446497469999997,
 0.710152659710835,
 0.6679449216830274,
 array([0.86086289, 1.75700964]),
 1.3089362647074716,
 0.18100667823453048,
 0.1940605207987439,
 0.08576797635833895,
 0.8887144900456984,
 1.27,
 2.269,
 1.689,
 0.9990000000000001,
 0.42343122227818775,
 0.25069936191722186,
 0.10592787003304842,
 0,
 2,
 1,
 0]