In [1]:
from bs4 import BeautifulSoup
import collections

#BELOW CODE FROM: https://stackoverflow.com/questions/22604564/create-pandas-dataframe-from-a-string
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

import pandas as pd

bands = ["g", "r", "i", "z"]

In [2]:
"""
GOAL FOR TODAY: Understand supernova light curve fitting and get a set routine
for at least 10-15 candidates
"""

'\nGOAL FOR TODAY: Understand supernova light curve fitting and get a set routine\nfor at least 10-15 candidates\n'

In [3]:
def get_tf(txt, band):
    txt = txt.replace(' ', '\n')
    txt = band + " BJD," + band + " Flux\n" + txt

    #BELOW CODE FROM: https://stackoverflow.com/questions/22604564/create-pandas-dataframe-from-a-string
    TESTDATA = StringIO(txt)

    df = pd.read_csv(TESTDATA, sep=",")
    return df

def save_csv(candidate, txts):
    result = pd.concat([get_tf(txts[0])[0], get_tf(txts[1])[0]], axis=1)
    result = pd.concat([result, get_tf(txts[2])[0]], axis=1)
    
    result.to_csv("data/"+str(candidate)+".csv")

In [6]:
with open('htmls/DC21badcl.html', 'r') as file:  # r to open file in READ mode
    html = file.read()
soup = BeautifulSoup(html)

In [7]:
def find_ticks(soup):
    divs = soup.body.find_all('div',  attrs={'class':'svgplotdiv'})
    bands = ["g", "r", "i", "z"]
    dictionary = {"g":[], "r":[], "i":[], "z":[]}
    for i in range(len(divs)):
        dictionary[bands[i]] = find_a_tick(divs[i])
    
    return dictionary
#    return {"g": find_a_tick(divs[0]), "r": find_a_tick(divs[1]), "i": find_a_tick(divs[2]), "z": find_a_tick(divs[3])}

def find_a_tick(div):
    return BeautifulSoup(str(div), 'html.parser').find_all('text', attrs={'class':'svgplotticklabel'})

def find_scalings(band_ticks):
    # Find the scaling for the x axis
    xs = [str(band_ticks[i].attrs["x"]) for i in range(len(band_ticks))]
    x_common = [item for item, count in collections.Counter(xs).items() if count > 1]
    
    ys = [str(band_ticks[i].attrs["y"]) for i in range(len(band_ticks))]
    y_common = [item for item, count in collections.Counter(ys).items() if count > 1]
   
    x1 = 0
    xval1 = 0
    
    x2 = 0
    xval2 = 0
    
    y1 = 0
    yval1 = 0
    
    y2 = 0
    yval2 = 0
    
    for x in range(len(band_ticks)):
        if band_ticks[x].attrs["x"]==x_common[0]:
            x1 = float(band_ticks[x].attrs["y"])
            xval1 = float(band_ticks[x].string)
            
            x2 = float(band_ticks[x+1].attrs["y"])
            xval2 = float(band_ticks[x+1].string)
            break
    
        
    for x in range(len(band_ticks)):    
        if band_ticks[x].attrs["y"]==y_common[0]:
            y1 = float(band_ticks[x].attrs["x"])
            yval1 = float(band_ticks[x].string)
            
            y2 = float(band_ticks[x+1].attrs["x"])
            yval2 = float(band_ticks[x+1].string)
            break
                   
    xm = (xval1-xval2)/(x1-x2)
    xb = xval2 - xm*x2
    
    ym = (yval1-yval2)/(y1-y2)
    yb = yval2 - ym*y2
    
    return (ym, yb), (xm, xb) #It's actually flipped (fix this), so it would return x scale factors then y scale factors

In [8]:
#FIND DYS

def find_dys(soup):
    divs = soup.body.find_all('div',  attrs={'class':'svgplotdiv'})
    bands = ["g", "r", "i", "z"]
    dictionary = {"g":[], "r":[], "i":[], "z":[]}
    for i in range(len(divs)):
        dictionary[bands[i]] = find_a_dy(divs[i])
    
    return dictionary
    
#    return {"g": find_a_dy(divs[0]), "r": find_a_dy(divs[1]), "i": find_a_dy(divs[2]), "z": find_a_dy(divs[3])}

def find_a_dy(div):
    return BeautifulSoup(str(div), 'html.parser').find_all('line', attrs={'class':'errorbar0'})

def dy_to_np(dys):
    y1 = []
    y2 = []
    
    for i in range(len(dys)):
        y1.append(float(dys[i].attrs["y1"]))
        y2.append(float(dys[i].attrs["y2"]))
        
    return np.array(y1)-np.array(y2)

In [9]:
def get_dataset_strs(soup, band):
    ds = soup.body.find_all('polyline',  attrs={'class':'dataset0'})
    points = ds[bands.index(band)].attrs["points"]
    
    df = get_tf(points, band) 
    
    return df
    
def scale_data(ts, band, xscale, yscale):
    ts[band + " BJD"] = ts[band + " BJD"]*xscale[0] + xscale[1]
    ts[band + " Flux"] = ts[band + " Flux"]*yscale[0] + yscale[1]
    
    return ts


In [10]:
import numpy as np

In [11]:
def main(Soup):
    # print(soup.body)
    band_ticks = find_ticks(Soup)
    dys = find_dys(soup)
    band_ticks = {x:y for x,y in band_ticks.items() if y!=[]}
    dys = {x:y for x,y in dys.items() if y!=[]}
    
    df = pd.DataFrame()
    for i in range(len(dys)):
        xs, ys = find_scalings(band_ticks[bands[i]])
        # print(get_dataset_strs(soup, bands[i]))
        df_band = scale_data(get_dataset_strs(soup, bands[i]), bands[i], xs, ys)
        
        dy = dy_to_np(dys[bands[i]])
        # print(dy)
        # print(ys)
        dy = pd.DataFrame({bands[i] + " dy": abs(dy*ys[0])})
        # print(dy)
        df_band = pd.concat([df_band, dy], axis=1)
        df = pd.concat([df, df_band], axis=1)
        dic = find_dys(soup)
        dydf = pd.DataFrame(dy_to_np(dic[bands[i]]))
        

    df.to_csv("data/" + soup.find('input',  attrs={'id':'showcand_candid'}).attrs["value"] + ".csv")   
    return df

main(soup)

Unnamed: 0,g BJD,g Flux,g dy,r BJD,r Flux,r dy,i BJD,i Flux,i dy,z BJD,z Flux,z dy
0,59328.277188,11681.163813,409.017357,59360.326323,9701.410515,338.302512,59359.853703,4299.843905,324.947512,59345.931119,6244.093387,345.163666
1,59328.345431,8335.202303,380.638472,59360.332527,9019.967027,334.976965,59363.931728,7597.417341,502.760287,59355.885401,2087.655529,292.166416
2,59331.341895,12020.003031,383.550059,59360.332527,9164.682176,335.276313,59375.848827,1757.201646,194.535206,59357.836026,7292.324899,345.977376
3,59331.348099,13074.102137,375.826997,59360.338731,8296.39128,340.211266,59375.854922,2396.764581,238.822873,59357.836026,7416.468615,343.322917
4,59331.348099,13604.485528,374.870909,59360.338731,7032.88148,325.179718,59384.797318,6654.888605,330.001259,59357.842121,6706.55669,344.712365
5,59331.354302,13892.104864,378.99294,59360.338731,6026.744825,378.070412,59384.797318,5495.955726,311.381361,59357.848217,6488.186775,328.10925
6,59331.354302,14807.39506,368.680104,59360.338731,5527.569152,362.981237,59384.803414,4280.403008,314.510877,59357.848217,6051.586747,360.530179
7,59331.360506,15007.425367,379.563097,59388.218872,24017.219271,573.633155,59387.753734,7363.417057,505.814096,59357.854313,5171.816021,365.962187
8,59358.303865,18122.139718,501.411665,59388.225076,24216.431581,536.300364,59387.753734,6903.505038,481.703784,59363.931728,5806.654551,533.679595
9,59358.303865,18050.007577,473.412635,59388.225076,23770.83715,493.876763,59387.759829,7334.326664,496.657972,59363.931728,4539.074514,492.903101
