In [None]:
# Make Jupyter Notebook full screen 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
import treecorr
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import sqlite3
import os
import datetime

In [None]:
DATA_PATH = '/Users/megantabbutt/CosmologyDataProducts/'
#DATA_PATH = '/afs/hep.wisc.edu/home/tabbutt/private/CosmologyDataProducts/'

TESTING_PRODUCTS_PATH = "/Users/megantabbutt/Cosmology/Cosmology/SNe CrossCorrelations/VerificationTestingProducts/"
#TESTING_PRODUCTS_PATH = "/afs/hep.wisc.edu/home/tabbutt/public/Cosmology/SNe CrossCorrelations/VerificationTestingProducts/"

# Create the directory to save to and a file with info about this run:
DATE = datetime.datetime.now().strftime("%d_%m_%Y_%H_%M")
CURRENT_DIRECTORY = DATE
TESTING_PRODUCTS_PATH = TESTING_PRODUCTS_PATH + CURRENT_DIRECTORY

os.mkdir(TESTING_PRODUCTS_PATH)

NOTES_NAME = "/RUNNING_NOTES_" + DATE + ".txt"
NOTES_PATH = TESTING_PRODUCTS_PATH + NOTES_NAME

In [None]:
''' Writes a string to a file.
File name: NOTES_NAME, path: NOTES_PATH. These are defined at the beginning of the program.

@param str notes: A single string to be writen.
'''
def NotesToWrite(notes):
    NOTES = open(NOTES_PATH, "a")
    NOTES.write(notes)
    NOTES.write("\n \n")
    NOTES.close()


''' Creates a simple 2D count-count correlation function using TreeCorr. 

@param object DataCatalog: TreeCorr Catalog object for the data 
@param object RandCatalog: TreeCorr Catalog object for the Randoms 
'''
def AutoCorrelationFunction(DataCatalog, RandCatalog):
    nn = treecorr.NNCorrelation(min_sep=0.01, max_sep=10, bin_size=0.2, sep_units='degrees')
    nn.process(DataCatalog)
    
    rr = treecorr.NNCorrelation(min_sep=0.01, max_sep=10, bin_size=0.2, sep_units='degrees')
    rr.process(RandCatalog)
    
    dr = treecorr.NNCorrelation(min_sep=0.01, max_sep=10, bin_size=0.2, sep_units='degrees')
    dr.process(DataCatalog, RandCatalog)
    
    r = numpy.exp(nn.meanlogr)
    xi, varxi = nn.calculateXi(rr, dr)
    sig = numpy.sqrt(varxi)
    
    return r, xi, varxi, sig

In [None]:
NotesToWrite("Created Running notes file for tracking details about this run and products produced/saved")
NotesToWrite("Notes about this specific run: 1st python test worked, trying again with larger data set. ")

In [None]:
NotesToWrite("0. Define the Queries you want to run and write and randoms length:")

randsLength = 10**8
NotesToWrite("randsLength for PanSTARRS: " + str(randsLength))

# Pull in All PanSTARRS Data (with a good redshift):
qry_PanSTARRS_Data_All = "SELECT ID, DEC, RA, zSN, zHost FROM PanSTARRSNEW WHERE (zSN > -999) || (zHost > -999)"
NotesToWrite("qry_PanSTARRS_Data_All" + " \n" + qry_PanSTARRS_Data_All)

qry_PanSTARRS_Data_Overlap = """SELECT ID, DEC, RA, zSN, zHost FROM PanSTARRSNEW WHERE (DEC > -20) 
    AND ((zSN > -999) OR (zHost > -999))"""
NotesToWrite("qry_PanSTARRS_Data_Overlap" + " \n" + qry_PanSTARRS_Data_Overlap)

qry_BOSS_Data_SouthAndNorthALL = "SELECT * FROM CMASSLOWZTOT_South UNION SELECT * FROM CMASSLOWZTOT_North"
NotesToWrite("qry_BOSS_Data_SouthAndNorthALL" + " \n" + qry_BOSS_Data_SouthAndNorthALL)

qry_BOSS_Rands_SouthAndNorthLimit = """SELECT * FROM CMASSLOWZTOT_South_rands 
    WHERE `index` IN (SELECT `index`FROM CMASSLOWZTOT_South_rands ORDER BY RANDOM() LIMIT 50000000) UNION 
    SELECT * FROM CMASSLOWZTOT_North_rands 
    WHERE `index`  IN (SELECT `index` FROM CMASSLOWZTOT_North_rands ORDER BY RANDOM() LIMIT 50000000)"""
NotesToWrite("qry_BOSS_Rands_SouthAndNorthLimit" + " \n" + qry_BOSS_Rands_SouthAndNorthLimit)

qry_CMASS_Rands_SampleLimit = """SELECT * FROM CMASS_South_rands 
    WHERE `index` IN (SELECT `index` FROM CMASS_South_rands ORDER BY RANDOM() LIMIT 50000000) UNION 
    SELECT * FROM CMASS_North_rands WHERE 
    `index` IN (SELECT `index` FROM CMASS_North_rands ORDER BY RANDOM() LIMIT 50000000)"""
NotesToWrite("qry_CMASS_Rands_SampleLimit" + " \n" + qry_CMASS_Rands_SampleLimit)

qry_LOWZ_Rands_SampleLimit = """SELECT * FROM LOWZ_South_rands 
    WHERE `index` IN (SELECT `index` FROM LOWZ_South_rands ORDER BY RANDOM() LIMIT 50000000) UNION 
    SELECT * FROM LOWZ_North_rands WHERE 
    `index` IN (SELECT `index` FROM LOWZ_North_rands ORDER BY RANDOM() LIMIT 50000000)"""
NotesToWrite("qry_LOWZ_Rands_SampleLimit" + " \n" + qry_LOWZ_Rands_SampleLimit)

In [None]:
NotesToWrite("1. Pull in and parse data")

# PanSTARRS
connPAN = sqlite3.connect(DATA_PATH + 'PanSTARRS.db')
PanSTARRSNEW_GoodZ = pd.read_sql(qry_PanSTARRS_Data_All, con=connPAN)
NotesToWrite("PanSTARRSNEW_GoodZ Database (with 10 pointings) objects: " + str(len(PanSTARRSNEW_GoodZ)))
connPAN.close()
print("PanSTARRSNEW_GoodZ: \n" + str(PanSTARRSNEW_GoodZ.head(3)))

# CMASS/LOWZ:
connBOSS = sqlite3.connect(DATA_PATH + 'CMASS_and_LOWZ.db')
CMASSLOWZTOT_DF = pd.read_sql(qry_BOSS_Data_SouthAndNorthALL, con=connBOSS)
NotesToWrite("CMASSLOWZTOT_DF Database objects: " + str(len(CMASSLOWZTOT_DF)))
connBOSS.close()
print("CMASSLOWZTOT_DF: \n" + str(CMASSLOWZTOT_DF.head(3)))


#Pull in the Randoms provided by CMASS:
connBOSSRands = sqlite3.connect(DATA_PATH + 'CMASS_and_LOWZ_rands.db')
CMASSLOWZTOT_DF_rands = pd.read_sql(qry_BOSS_Rands_SouthAndNorthLimit, con=connBOSSRands)
CMASSLOWZTOT_DF_rands.to_json(DATA_PATH + "CMASSLOWZTOT_DF_rands")
NotesToWrite("CMASSLOWZTOT_DF_rands Database objects: " + str(len(CMASSLOWZTOT_DF_rands)))
connBOSSRands.close()
print("CMASSLOWZTOT_DF_rands: \n" + str(CMASSLOWZTOT_DF_rands.head(3)))

In [None]:
NotesToWrite("5. Make CMASS&LOWZ Count-Count Auto Correlation Functions:")


# 5.1 BOSS total AutoCorrelation Function
NotesToWrite("5.1 BOSS total AutoCorrelation Function")

cat_BOSS = treecorr.Catalog(ra=CMASSLOWZTOT_DF['RA'], dec=CMASSLOWZTOT_DF['DEC'],
                            ra_units='degrees', dec_units='degrees')

cat_rand_BOSS = treecorr.Catalog(ra=CMASSLOWZTOT_DF_rands['RA'], dec=CMASSLOWZTOT_DF_rands['DEC'],
                                 ra_units='degrees', dec_units='degrees')

NotesToWrite("Created cat_BOSS & cat_rand_BOSS.")

In [None]:
r_BOSS, xi_BOSS, varxi_BOSS, sig_BOSS = AutoCorrelationFunction(cat_BOSS, cat_rand_BOSS)

In [None]:
# Plot the autocorrelation function:
plt.plot(r_BOSS, xi_BOSS, color='blue')
plt.plot(r_BOSS, -xi_BOSS, color='blue', ls=':')
#plt.errorbar(r_BOSS[xi_BOSS>0], xi_BOSS[xi_BOSS>0], yerr=sig_BOSS[xi_BOSS>0], color='green', lw=0.5, ls='')
#plt.errorbar(r_BOSS[xi_BOSS<0], -xi_BOSS[xi_BOSS<0], yerr=sig_BOSS[xi_BOSS<0], color='green', lw=0.5, ls='')
#leg = plt.errorbar(-r_BOSS, xi_BOSS, yerr=sig_BOSS, color='blue')
plt.xscale('log')
plt.yscale('log', nonposy='clip')
plt.xlabel(r'$\theta$ (degrees)')
#plt.legend([leg], [r'$w(\theta)$'], loc='lower left')
plt.xlim([0.01,10])
plt.title("BOSS Auto Corr with BOSS randoms")
plt.savefig(TESTING_PRODUCTS_PATH + "/BOSS Auto Corr with BOSS randoms")
#plt.close()
plt.show()

NotesToWrite("Plotted: BOSS Auto Corr with BOSS randoms")

In [None]:
BOSS_AutoCorr_Plot_Results_dict = {'r_BOSS': r_BOSS, 'xi_BOSS': xi_BOSS, 
                                   'varxi_BOSS':varxi_BOSS, 'sig_BOSS':sig_BOSS}

BOSS_AutoCorr_Plot_Results_df = pd.DataFrame(data=BOSS_AutoCorr_Plot_Results_dict)

BOSS_AutoCorr_Plot_Results_df.to_csv(DATA_PATH+'BOSS_AutoCorr_Plot_Results_df_9_28' + '.csv')