### Dependencies

In [None]:
import numpy as np # Scientific computing 
import pandas as pd # Dataframe to organize content
import os # Creating folders
import pickle # Serializing module
import FATS # Feature extraction library
import sqlite3 # Database library
import urllib # Reading and saving webpages

### Scrape EB Stars from Astrouw then Store in Respective Directories
More specifically, this code will scrape a random set of ~2400 Eclipsing Binary Stars (~800 of EC, ESD, and ED each) from the Astrouw database (ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/).

In [None]:
# Read list file from web.
list_file_url = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/ecl/ident.dat'
list_file = pd.read_csv(list_file_url, sep="\s+", header=None, usecols=[0,3])

# This is an array of all EB stars on the astrouw website.
eb_stars = list_file.as_matrix()

# The EB stars are sorted using catagories EC, ESD, and EC.
eb_ec, eb_esd, eb_ed = np.array([]), np.array([]), np.array([])
for [name, cata] in eb_stars:
    if cata == 'EC':
        eb_ec = np.append(eb_ec, [name])
    if cata == 'ESD':
        eb_esd = np.append(eb_esd, [name])
    if cata == 'ED':
        eb_ed = np.append(eb_ed, [name])

# 800 random stars are selected from each catagory.
np.random.shuffle(eb_ec)        
np.random.shuffle(eb_esd)        
np.random.shuffle(eb_ed)
eb_ec = eb_ec[:800]
eb_esd = eb_esd[:800]
eb_ed = eb_ed[:800]

# Saving photometry data to corresponding folders.
directories = [('eb_ec', eb_ec), ('eb_esd', eb_esd), ('eb_ed', eb_ed)]
for (folder, cata) in directories: 
    for star in cata:
        url_I = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/ecl/phot/I/'+star+'.dat'
        url_V = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/ecl/phot/V/'+star+'.dat'
        try:
            urllib.urlretrieve(url_I, "Data/"+folder+"/I/"+star+".dat")
            urllib.urlretrieve(url_V, "Data/"+folder+"/V/"+star+".dat")
        except:
            pass
        # Clear the cache.
        urllib.urlcleanup()

print('Done')

### Extract Features from all Star Data
A detailed explanation of the features that are extracted is given on http://isadoranun.github.io/tsfeat/FeaturesDocumentation.html.  In the below code, features are extracted from each data-set and stored in a database.

In [None]:
# A list of all star classes.
star_classes = os.listdir('Data/')

# Initializing database and cursor.
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()
    
# This loop will obtain the features for each star.
for star_class in star_classes:
    # Initializing table--(star type) and data type--(BLOB).
    star_data_cursor.execute("CREATE TABLE IF NOT EXISTS "+star_class+"(star_name BLOB, star_features BLOB)")
    
    # Find the stars that have both I-band and V-band data.
    stars_I = os.listdir('Data/'+star_class+'/I')
    stars_V = os.listdir('Data/'+star_class+'/V')
    overlapping_stars = [star for star in stars_I if star in stars_V]
    
    for star in overlapping_stars:
        # Reading Data for I-band.
        star_data_path_I = 'Data/'+star_class+'/I/'+star
        star_data_I = pd.read_csv(star_data_path_I, sep="\s+", names=["time", "magnitude", "error"])

        # Reading Data for V-band.
        star_data_path_V = 'Data/'+star_class+'/V/'+star
        star_data_V = pd.read_csv(star_data_path_V, sep="\s+", names=["time2", "magnitude2", "error2"])

        # Creating lists from pandas dataframe (these are the parameters used to compute the features).
        magnitude = star_data_I.magnitude.tolist()
        time = star_data_I.time.tolist()
        error = star_data_I.error.tolist()
        magnitude2 = star_data_V.magnitude2.tolist()

        #Extracting Features.
        parameters = np.array([magnitude, time, error])
        feature_space = FATS.FeatureSpace(Data=['magnitude','time', 'error'])
        features = feature_space.calculateFeature(parameters)
        features_array = features.result(method='array')
        # Custom feature:colour -- is also added to array.
        colour = np.mean(magnitude) - np.mean(magnitude2)
        features_array.append(colour)

        # Serializing features so that it can be stored in database.
        features_pickled = pickle.dumps(features_array, pickle.HIGHEST_PROTOCOL)

        # Storing star_data in database for future reference.
        star_data_cursor.execute("INSERT INTO "+star_class+"(star_name, star_features) VALUES (?,?)",
                                 (star, sqlite3.Binary(features_pickled)))
        star_data_db.commit()


print("Data stored.")

### Retrieve Data from Database

In [None]:
# Initializing database and cursor
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()

# Retrieving star_data from database
star_data_cursor.execute('SELECT star_features FROM astrouw_data')
for row in star_data_cursor.fetchall()[:10]:
    # BLOB field needs to be strinyfied with str() before loading with pickle
    data = pickle.loads(str(row[0]))
    print(data)
    print(len(data))
    
# star_data_cursor.execute('SELECT star_data FROM astrouw_data')
# for row in star_data_cursor.fetchall()[:2]:
#     # BLOB field needs to be strinyfied with str() before loading with pickle
#     data = pickle.loads(str(row[0]))
#     print(data)
#     print(len(data))

# Close cursor and database    
star_data_cursor.close
star_data_db.close()

In [None]:
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()
star_data_cursor.executescript('drop table if exists lpv_mira_agb_c;')