In [1]:
import numpy as np # Scientific computing 
import pandas as pd # Dataframe to organize content
import os # Creating folders
import pickle # Serializing module
import FATS # Feature extraction library
import sqlite3 # Database library

  from pandas.core import datetools


In [None]:
# Read list file from web
list_file_url = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/t2cep/t2cep.dat'
list_file = pd.read_csv(list_file_url, sep="\s+", header=None)

# This is a list of all type-2 cepheids on the astrouw website
t2ceph_astrouw = list_file[0].tolist()
# Remove lightcurve 170 as it does not have V-band data
t2ceph_astrouw.remove('OGLE-LMC-T2CEP-170')

# Initializing database and cursor
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()
# Initializing table:'astrouw_data' and columns:ex.'star_data' with datatype:ex.'BLOB'
star_data_cursor.execute("CREATE TABLE IF NOT EXISTS astrouw_data(star_data BLOB, star_features BLOB, star_label REAL)")
    
# This loop will obtain the necessary data for each star
for star in t2ceph_astrouw:
    print(star)
    # Reading Data for I-band
    star_data_url_I = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/t2cep/phot/I/'+star+'.dat'
    star_data_I = pd.read_csv(star_data_url_I, sep="\s+", names=["time", "magnitude", "error"])
    
    # Reading Data for V-band
    star_data_url_V = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/t2cep/phot/V/'+star+'.dat'
    star_data_V = pd.read_csv(star_data_url_V, sep="\s+", names=["time2", "magnitude2", "error2"])

    # Creating lists from pandas dataframe (these are the parameters used to compute the features)
    magnitude = star_data_I.magnitude.tolist()
    time = star_data_I.time.tolist()
    error = star_data_I.error.tolist()
    magnitude2 = star_data_V.magnitude2.tolist()
    
    #Extracting Features
    parameters = np.array([magnitude, time, error])
    feature_space = FATS.FeatureSpace(Data=['magnitude','time', 'error'])
    features = feature_space.calculateFeature(parameters)
    features_array = features.result(method='array')
    # Custom feature:colour -- is also added to array
    colour = np.mean(magnitude) - np.mean(magnitude2)
    features_array.append(colour)

    
    # Serializing star_data so that it can be stored in database
    star_pickled = pickle.dumps((star, time, magnitude, error, magnitude2), pickle.HIGHEST_PROTOCOL)
    # Serializing features so that it can be stored in database
    features_pickled = pickle.dumps(features_array, pickle.HIGHEST_PROTOCOL)
    
    # Storing star_data in database for future reference
    star_data_cursor.execute("INSERT INTO astrouw_data (star_data, star_features, star_label) VALUES (?,?,?)",
                             (sqlite3.Binary(star_pickled), sqlite3.Binary(features_pickled), 1))
    star_data_db.commit()

print("Data stored.")