In [1]:
import numpy as np
from astropy.io import ascii, fits
from astropy.table import Table, Column
import pandas as pd

# Reading HTML table files

In [2]:
file='GRB161219B-2016jca/GRB161219B-2016jca_Optical1.html'

In [3]:
def check_float(potential_float):
    try:
        float(potential_float) #Try to convert argument into a float
        return True
    except ValueError:
        return False

n=0
k=-1
for i in range(0, 6):
    #Read in the file changing where the data starts
    data = ascii.read(file, format='html', data_start=i)
    
    
    #find the first row that has a column value that works as a float
    for j in range(len(data.columns)):
        if check_float(data[0][j])==True:
            print(i)
            print(data[0][j])
            k = i
            break
    if k==i:
        break
    n=i
    
#Get the actual data with the correct data start value    
data = ascii.read(file, format='html', data_start=k, encoding='utf-8')
data

1
57744.08


MJD  .,B  .,V  .,R  .,I  .,g  .,r  .,i  .,Telescope  .
float64,str12,str12,str12,str12,str12,str13,str12,str4
57744.08,19.83 ± 0.01,19.66 ± 0.01,19.39 ± 0.01,18.89 ± 0.00,–,–,–,VLT
57746.08,20.04 ± 0.01,19.84 ± 0.01,19.74 ± 0.01,19.33 ± 0.01,–,–,–,VLT
57748.12,20.24 ± 0.01,19.82 ± 0.01,19.73 ± 0.01,19.48 ± 0.01,–,–,–,VLT
57749.03,20.21 ± 0.03,19.65 ± 0.10,19.53 ± 0.02,19.35 ± 0.05,–,–,–,TNG
57749.99,20.18 ± 0.13,19.8 ± 0.2,–,–,19.81 ± 0.05,19.63 ± 0.07,19.80 ± 0.03,LT
57750.0,–,19.8 ± 0.2,–,–,19.80 ± 0.05,19.63 ± 0.07,19.80 ± 0.03,LT
57751.09,20.46 ± 0.01,19.79 ± 0.01,19.64 ± 0.01,19.44 ± 0.01,–,–,–,VLT
57751.98,–,19.63 ± 0.05,–,–,19.92 ± 0.05,19.54 ± 0.09,19.76 ± 0.08,LT
57751.99,–,19.63 ± 0.05,–,–,19.92 ± 0.05,19.54 ± 0.09,19.75 ± 0.08,LT
57753.99,20.52 ± 0.21,19.77 ± 0.06,19.43 ± 0.06,19.35 ± 0.12,–,–,–,TNG


# Cleaning up the data

* I want the pm_remover() to:

    * Take a table as input
    * Check if there are pm values in the rows
    * If there are then split that row into data and errors
    * Append the new error row to the table whilst overwriting the old row with the float data
    * Return the table

In [4]:
# Need a way to make the columns that should be floats into floats
def pm_remover(table): #Should separate columns containing ± into data and error
    num_cols = len(table.columns) # Get the number of columns
    
    to_delete = []
    for i in range(num_cols): #Iterate over those columns
        print(num_cols)
        data_a = []
        data_b = []
        
        if isinstance(table[0][i], str)==True: #Check if theres a string in column i row 0
            for l in table.columns[i]:
                print(l)
                string = l.replace(u'\xa0', u'') #Unicode space characters are popping up when parsing the html
                
                if '–' in string:
                    sep_data_a, sep_data_b = 'NaN', 'NaN'
                    sep_data_a, sep_data_b = float(sep_data_a), float(sep_data_b) #Convert to float (nan)
                    data_a.append(sep_data_a)
                    data_b.append(sep_data_b)
                    
                if '±' in string:
                    sep_data_a, sep_data_b = l.split('±') #Separate the data
                    sep_data_a, sep_data_b = float(sep_data_a), float(sep_data_b) #Convert to float

                    data_a.append(sep_data_a)
                    data_b.append(sep_data_b)
                        
            col_a = Column(data_a, name=str(table.colnames[i])+'band') #Make a new column for the data
            col_b = Column(data_b, name='Δ'+str(table.colnames[i])) #Make a new column for the errors
            
            table.add_column(col_a) #Add the data into its own column
            table.add_column(col_b) #Add the error to its own column
            
            to_delete.append(table.colnames[i])#Record that this column should be deleted.
    
    #Drop the appropriate columns one at a time
    for k in to_delete:
        print(k)
        table.remove_column(k) #Remove the columns that are being split
    
pm_remover(data)

9
9
19.83 ± 0.01
20.04 ± 0.01
20.24 ± 0.01
20.21 ± 0.03
20.18 ± 0.13
–
20.46 ± 0.01
–
–
20.52 ± 0.21
20.57 ± 0.01
20.87 ± 0.01
20.60 ± 0.09
20.60 ± 0.09
20.90 ± 0.06
20.90 ± 0.06
20.92 ± 0.02
–
21.18 ± 0.02
21.58 ± 0.06
–
–
–
21.98 ± 0.01
–
–
22.17 ± 0.02
–
22.64 ± 0.03
9
19.66 ± 0.01
19.84 ± 0.01
19.82 ± 0.01
19.65 ± 0.10
19.8 ± 0.2
19.8 ± 0.2
19.79 ± 0.01
19.63 ± 0.05
19.63 ± 0.05
19.77 ± 0.06
19.82 ± 0.01
20.17 ± 0.03
20.2 ± 0.1
20.2 ± 0.1
20.03 ± 0.03
20.03 ± 0.03
19.96 ± 0.01
–
20.12 ± 0.01
20.33 ± 0.02
20.51 ± 0.03
–
–
20.93 ± 0.01
–
–
21.23 ± 0.02
–
–
9
19.39 ± 0.01
19.74 ± 0.01
19.73 ± 0.01
19.53 ± 0.02
–
–
19.64 ± 0.01
–
–
19.43 ± 0.06
19.53 ± 0.01
19.64 ± 0.03
–
–
–
–
19.58 ± 0.01
–
19.66 ± 0.01
19.73 ± 0.01
19.97 ± 0.02
–
–
20.25 ± 0.01
–
–
20.56 ± 0.01
–
21.24 ± 0.01
9
18.89 ± 0.00
19.33 ± 0.01
19.48 ± 0.01
19.35 ± 0.05
–
–
19.44 ± 0.01
–
–
19.35 ± 0.12
19.40 ± 0.01
19.44 ± 0.04
–
–
–
–
19.41 ± 0.01
–
19.47 ± 0.01
19.47 ± 0.01
19.61 ± 0.02
–
–
19.82 ± 0.01
–
–
20.08 ± 0.01


ValueError: Inconsistent data column lengths

In [None]:
data

In [None]:
float('NaN')

# Making FITS files from table objects

In [None]:
data.write('GRB161219B-2016jca.fits', format='ascii', overwrite=True)

In [None]:
data.read('GRB161219B-2016jca.fits', format='ascii')

In [None]:
hdul = fits.open('GRB161219B-2016jca.fits')

# Template fits file