### Preprocesamiento de datos

In [2]:
import os
import pandas as pd
from astropy.table import Table, vstack
from costas.acquisition import parser
from costas.filter import grade_filter
from costas.preprocessing import preprocessing
from costas.fextraction import get_ra_dec, get_statistics

In [3]:
data_table = Table(names=('RA', 'DEC', 'Q1\'', 'C1', 'Q2\'', 'C2'), dtype=('S10', 'S10', 'f8', 'f8', 'f8', 'f8'))

# Recorremos cada archivo de la carpeta y aplicamos las funciones de procesamiento y extraccion de datos
for filename in os.listdir('ASAS Original Data/00'):
    data = parser('ASAS Original Data/00/' + filename)
    
    data = grade_filter(data, ["A", "B"])
    
    data = preprocessing(data, "2")
    
    RA_DEC = get_ra_dec(data)
    
    statistics = get_statistics(data["HJD"], data["MAG_2"])
    
    # Agregamos la fila procesada a la tabla final
    data_table.add_row([RA_DEC[0], RA_DEC[1], statistics[0], statistics[1], statistics[2], statistics[3]])
    
data_table


RA,DEC,Q1',C1,Q2',C2
bytes10,bytes10,float64,float64,float64,float64
0.021658,-35.860717,0.3704465224525659,0.00019372400866490835,2.270616868432857,0.007223682676412602
0.204690,-35.187733,-2.4250875235695233,0.006879940525866823,-0.31489520243364477,0.00011746475437901527
0.178739,1.868984,10.703526590296327,0.19100616266465242,3.548751647028042,0.027936106538981265
0.166771,-3.274737,-1.0940728272918765,0.002161402927045919,0.34584769716823244,0.00021739318437474786
0.486658,-37.908880,0.12008588929346423,1.676714209986052e-05,-0.4854295933670283,0.00027452852857379195
0.448549,-39.882324,-1.8224195408509276,0.00436434235918759,0.8697193238601402,0.0010016990516656943
0.460272,-83.357023,9.291119426340257,0.04161102283524798,-3.3203456944387,0.005623346409308061
0.552012,-32.022080,2.203287573046258,0.005729527612315599,-9.990583206138126,0.10132617035172109
0.333543,27.996736,-0.16638016012314277,0.00015370844892814706,2.8981219706092154,0.0440888778377726
0.941835,16.697347,-1.359042776047518,0.004702490123437264,-0.4904316969562535,0.0006193262618555684


### Comprimir

In [35]:
import pickle

In [36]:
filename = 'ASAS Data Tables/00'
outfile = open(filename, 'wb')
pickle.dump(data_table, outfile)
outfile.close()

Finalmente, creamos una funcion que nos permita hacer esto con cada archivo que tenemos

# TODO: Verificar porque falla con ciertos archivos
### Esta funcion no funciona

In [66]:
for number in range(16, 20):
    folder = str(number)
    print(folder)
    data_table = Table(names=('RA', 'DEC', 'Q1\'', 'C1', 'Q2\'', 'C2'), dtype=('S10', 'S10', 'f8', 'f8', 'f8', 'f8'))
    
    analyze_data = 'ASAS Original Data/' + folder
    
    for filename in os.listdir(analyze_data):
        print(filename)
        data = parser('ASAS Original Data/' + folder + '/' + filename)
    
        data = grade_filter(data, ["A", "B"])
    
        data = preprocessing(data, "2")
    
        RA_DEC = get_ra_dec(data)
    
        statistics = get_statistics(data["HJD"], data["MAG_2"])
    
        # Agregamos la fila procesada a la tabla final
        data_table.add_row([RA_DEC[0], RA_DEC[1], statistics[0], statistics[1], statistics[2], statistics[3]])
    
    filename = 'ASAS Data Tables/' + folder
    outfile = open(filename, 'wb')
    pickle.dump(data_table, outfile)
    outfile.close()
    
    print(data_table)

16
165413+2253.3
163529-6814.7
162625-0449.8
163329+0403.0
162619+2253.9
161824-4439.5
161513-1532.5
162110+2539.4
161501-1345.9
164404+0153.3
161034+1139.3
160031-2512.1
164424-2833.9
165521-4806.2
162906-0531.8
162908-6720.5
165911-6521.6
160957+0833.2
163345-2716.0
162526-0439.7
160824-4657.4
160756-1859.8
165213-5157.8
162929+1357.8
165848+2051.0
163740-0508.6
163703-3040.9
162500-7931.7
160809-4928.2
165428-2931.4
160718+0955.9
162811+0304.3
164828-2608.8
165725+0001.0
161820-7333.2
162431-5456.8
160216-6552.3
163556-6409.7
161216-0738.0
164818-3237.5
161655-6956.6
165610-2027.5
162229-4835.7
164244+1426.8
162745+1103.6
162749-5332.1
165112-6401.3
160107-3441.8
163556-6434.8
163307-4159.8
164411+2014.6
161520-3255.1
163843-4324.7
162432-6201.8
165907-6005.4
161910+0830.1
161228-0325.3
164741-4615.3
165125+0818.8
160058+0210.4
161239-6515.4
163657-3057.0
162238+1910.6
161001-3542.3
162335+0024.5
161914-3222.2
165024-1903.9
161332-2655.8
161329-3220.5
165558-0406.6
164256-0338.9
163

164659+0322.5
162317-2956.1
160922-0846.9
165431-4057.2
160017-4507.6
165739-1749.2
165141-6101.4
162743-6223.1
161656-6411.0
161200-6854.7
164811-5950.4
160740-7552.7
163839+1057.2
163736-7740.9
160836-2416.5
162752+1546.0
162014-4954.5
164726+2133.8
162355-0754.4
162633-6904.6
162912-3748.7
163920-4104.9
164121+0030.4
162404-6539.7
165859+1312.9
162336-4830.1
160254-1851.4
165651-2006.7
161224-5026.7
165416+1837.6
161025-2629.0
163006-0156.9
164017-5826.3
162802+0719.6
165123-5610.4
161225+0806.5
164137-2821.3
164326-5450.0
161646-2054.7
162407-1301.4
162308-2301.0
162455+0259.9
165735-3250.5
164612-1550.6
162630-5035.8
164935-4701.3
165000-3427.3
162226-3207.2
163851-6315.2
161831-5839.8
162237-0819.5
162921-7127.9
162134-1739.8
162530+0339.5
161149-3635.8
163415-1709.6
161600-3839.1
165648-6635.4
164618-5401.9
165330-4120.4
161437-2611.1
163910-6227.0
162514-7915.0
165652-2028.4
164743-5855.4
164813-3256.7
160254-3517.4
165926-2858.6
163007+0125.2
160125-5150.3
164620+0155.8
164810

ValueError: Data type <class 'int'> not allowed to init Table