In [1]:
import numpy as np
from astropy.io import ascii, fits
from astropy.table import Table, Column
import pandas as pd

# Reading HTML table files

In [2]:
file='GRB161219B-2016jca/GRB161219B-2016jca_Optical1.html'

In [3]:
def check_float(potential_float):
    try:
        float(potential_float) #Try to convert argument into a float
        return True
    except ValueError:
        return False

n=0
k=-1
for i in range(0, 6):
    #Read in the file changing where the data starts
    data = ascii.read(file, format='html', data_start=i)
    
    
    #find the first row that has a column value that works as a float
    for j in range(len(data.columns)):
        if check_float(data[0][j])==True:
            print(i)
            print(data[0][j])
            k = i
            break
    if k==i:
        break
    n=i
    
#Get the actual data with the correct data start value    
data = ascii.read(file, format='html', data_start=k, encoding='utf-8')
data

1
57744.08


MJD  .,B  .,V  .,R  .,I  .,g  .,r  .,i  .,Telescope  .
float64,str12,str12,str12,str12,str12,str13,str12,str4
57744.08,19.83 ± 0.01,19.66 ± 0.01,19.39 ± 0.01,18.89 ± 0.00,–,–,–,VLT
57746.08,20.04 ± 0.01,19.84 ± 0.01,19.74 ± 0.01,19.33 ± 0.01,–,–,–,VLT
57748.12,20.24 ± 0.01,19.82 ± 0.01,19.73 ± 0.01,19.48 ± 0.01,–,–,–,VLT
57749.03,20.21 ± 0.03,19.65 ± 0.10,19.53 ± 0.02,19.35 ± 0.05,–,–,–,TNG
57749.99,20.18 ± 0.13,19.8 ± 0.2,–,–,19.81 ± 0.05,19.63 ± 0.07,19.80 ± 0.03,LT
57750.0,–,19.8 ± 0.2,–,–,19.80 ± 0.05,19.63 ± 0.07,19.80 ± 0.03,LT
57751.09,20.46 ± 0.01,19.79 ± 0.01,19.64 ± 0.01,19.44 ± 0.01,–,–,–,VLT
57751.98,–,19.63 ± 0.05,–,–,19.92 ± 0.05,19.54 ± 0.09,19.76 ± 0.08,LT
57751.99,–,19.63 ± 0.05,–,–,19.92 ± 0.05,19.54 ± 0.09,19.75 ± 0.08,LT
57753.99,20.52 ± 0.21,19.77 ± 0.06,19.43 ± 0.06,19.35 ± 0.12,–,–,–,TNG


# Cleaning up the data

* I want the pm_remover() to:

    * Take a table as input
    * Check if there are pm values in the rows
    * If there are then split that row into data and errors
    * Append the new error row to the table whilst overwriting the old row with the float data
    * Return the table

In [19]:
# Need a way to make the columns that should be floats into floats
def pm_remover(table): #Should separate columns containing ± into data and error
    num_cols = len(table.columns) # Get the number of columns
    for i in range(num_cols): #Iterate over those columns
        data_a = []
        data_b = []
        
        if isinstance(table[0][i], str)==True: #Check if theres a string in column i row 0
            for l in table.columns[i]:
                string = l.replace(u'\xa0', u'') #Unicode space characters are popping up when parsing the html
                
                if '–' in string:
                    sep_data_a, sep_data_b = 'NaN', 'NaN'
                    sep_data_a, sep_data_b = float(sep_data_a), float(sep_data_b) #Convert to float (nan)
                    data_a.append(sep_data_a)
                    data_b.append(sep_data_b)
                    
                if '±' in string:
                    sep_data_a, sep_data_b = l.split('±') #Separate the data
                    sep_data_a, sep_data_b = float(sep_data_a), float(sep_data_b) #Convert to float

                    data_a.append(sep_data_a)
                    data_b.append(sep_data_b)
                        
            col_a = Column(data_a, name=str(table.colnames[i])) #Make a new column for the data
            col_b = Column(data_b, name='Δ'+str(table.colnames[i])) #Make a new column for the errors

            table.remove_column(table.colnames[i]) #Remove the column that is being split
            table.add_column(col_a, index=[0]) #Add the data into its own column
            table.add_column(col_b, index=[0]) #Add the error to its own column
            
pm_remover(data)

1


TypeError: slice indices must be integers or None or have an __index__ method

In [20]:
data

MJD  .,Telescope  .,B  .,V  .,R  .,I  .,g  .,r  .,i  .
float64,str4,float64,float64,float64,float64,float64,float64,float64
57744.08,VLT,19.83,19.66,19.39,18.89,,,
57746.08,VLT,20.04,19.84,19.74,19.33,,,
57748.12,VLT,20.24,19.82,19.73,19.48,,,
57749.03,TNG,20.21,19.65,19.53,19.35,,,
57749.99,LT,20.18,19.8,,,19.81,19.63,19.8
57750.0,LT,,19.8,,,19.8,19.63,19.8
57751.09,VLT,20.46,19.79,19.64,19.44,,,
57751.98,LT,,19.63,,,19.92,19.54,19.76
57751.99,LT,,19.63,,,19.92,19.54,19.75
57753.99,TNG,20.52,19.77,19.43,19.35,,,


In [None]:
float('NaN')

# Making FITS files from table objects

In [None]:
data.write('GRB161219B-2016jca.fits', format='ascii', overwrite=True)

In [None]:
data.read('GRB161219B-2016jca.fits', format='ascii')

In [None]:
hdul = fits.open('GRB161219B-2016jca.fits')

# Template fits file