### Dependencies

In [1]:
import numpy as np # Scientific computing 
import pandas as pd # Dataframe to organize content
import os # Creating folders
import pickle # Serializing module
import FATS # Feature extraction library
import sqlite3 # Database library

  from pandas.core import datetools


### Scrape T2CEPH Files from Astrouw -- Extract Features then Store in Database

In [None]:
# Read list file from web
list_file_url = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/t2cep/t2cep.dat'
list_file = pd.read_csv(list_file_url, sep="\s+", header=None)

# This is a list of all type-2 cepheids on the astrouw website
t2ceph_astrouw = list_file[0].tolist()
# Remove lightcurve 170 as it does not have V-band data
t2ceph_astrouw.remove('OGLE-LMC-T2CEP-170')

# Initializing database and cursor
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()
# Initializing table:'astrouw_data' and columns:ex.'star_data' with datatype:ex.'BLOB'
star_data_cursor.execute("CREATE TABLE IF NOT EXISTS astrouw_data(star_data BLOB, star_features BLOB, star_label REAL)")
    
# This loop will obtain the necessary data for each star
for star in t2ceph_astrouw:
    print(star)
    # Reading Data for I-band
    star_data_url_I = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/t2cep/phot/I/'+star+'.dat'
    star_data_I = pd.read_csv(star_data_url_I, sep="\s+", names=["time", "magnitude", "error"])
    
    # Reading Data for V-band
    star_data_url_V = 'ftp://ftp.astrouw.edu.pl/ogle/ogle3/OIII-CVS/lmc/t2cep/phot/V/'+star+'.dat'
    star_data_V = pd.read_csv(star_data_url_V, sep="\s+", names=["time2", "magnitude2", "error2"])

    # Creating lists from pandas dataframe (these are the parameters used to compute the features)
    magnitude = star_data_I.magnitude.tolist()
    time = star_data_I.time.tolist()
    error = star_data_I.error.tolist()
    magnitude2 = star_data_V.magnitude2.tolist()
    
    #Extracting Features
    parameters = np.array([magnitude, time, error])
    feature_space = FATS.FeatureSpace(Data=['magnitude','time', 'error'])
    features = feature_space.calculateFeature(parameters)
    features_array = features.result(method='array')
    # Custom feature:colour -- is also added to array
    colour = np.mean(magnitude) - np.mean(magnitude2)
    features_array.append(colour)

    
    # Serializing star_data so that it can be stored in database
    star_pickled = pickle.dumps((star, time, magnitude, error, magnitude2), pickle.HIGHEST_PROTOCOL)
    # Serializing features so that it can be stored in database
    features_pickled = pickle.dumps(features_array, pickle.HIGHEST_PROTOCOL)
    
    # Storing star_data in database for future reference
    star_data_cursor.execute("INSERT INTO astrouw_data (star_data, star_features, star_label) VALUES (?,?,?)",
                             (sqlite3.Binary(star_pickled), sqlite3.Binary(features_pickled), 1))
    star_data_db.commit()

print("Data stored.")

OGLE-LMC-T2CEP-001




OGLE-LMC-T2CEP-002
OGLE-LMC-T2CEP-003
OGLE-LMC-T2CEP-004
OGLE-LMC-T2CEP-005
OGLE-LMC-T2CEP-006
OGLE-LMC-T2CEP-007
OGLE-LMC-T2CEP-008
OGLE-LMC-T2CEP-009
OGLE-LMC-T2CEP-010
OGLE-LMC-T2CEP-011
OGLE-LMC-T2CEP-012
OGLE-LMC-T2CEP-013
OGLE-LMC-T2CEP-014
OGLE-LMC-T2CEP-015


OGLE-LMC-T2CEP-016
OGLE-LMC-T2CEP-017
OGLE-LMC-T2CEP-018
OGLE-LMC-T2CEP-019
OGLE-LMC-T2CEP-020
OGLE-LMC-T2CEP-021
OGLE-LMC-T2CEP-022
OGLE-LMC-T2CEP-023
OGLE-LMC-T2CEP-024
OGLE-LMC-T2CEP-025
OGLE-LMC-T2CEP-026
OGLE-LMC-T2CEP-027
OGLE-LMC-T2CEP-028
OGLE-LMC-T2CEP-029


OGLE-LMC-T2CEP-030
OGLE-LMC-T2CEP-031
OGLE-LMC-T2CEP-032
OGLE-LMC-T2CEP-033
OGLE-LMC-T2CEP-034
OGLE-LMC-T2CEP-035
OGLE-LMC-T2CEP-036
OGLE-LMC-T2CEP-037
OGLE-LMC-T2CEP-038
OGLE-LMC-T2CEP-039
OGLE-LMC-T2CEP-040
OGLE-LMC-T2CEP-041
OGLE-LMC-T2CEP-042
OGLE-LMC-T2CEP-043


OGLE-LMC-T2CEP-044
OGLE-LMC-T2CEP-045
OGLE-LMC-T2CEP-046
OGLE-LMC-T2CEP-047
OGLE-LMC-T2CEP-048
OGLE-LMC-T2CEP-049
OGLE-LMC-T2CEP-050
OGLE-LMC-T2CEP-051
OGLE-LMC-T2CEP-052
OGLE-LMC-T2CEP-053
OGLE-LMC-T2CEP-054
OGLE-LMC-T2CEP-055
OGLE-LMC-T2CEP-056
OGLE-LMC-T2CEP-057


OGLE-LMC-T2CEP-058
OGLE-LMC-T2CEP-059
OGLE-LMC-T2CEP-060
OGLE-LMC-T2CEP-061
OGLE-LMC-T2CEP-062
OGLE-LMC-T2CEP-063
OGLE-LMC-T2CEP-064
OGLE-LMC-T2CEP-065
OGLE-LMC-T2CEP-066
OGLE-LMC-T2CEP-067
OGLE-LMC-T2CEP-068
OGLE-LMC-T2CEP-069
OGLE-LMC-T2CEP-070
OGLE-LMC-T2CEP-071


OGLE-LMC-T2CEP-072
OGLE-LMC-T2CEP-073
OGLE-LMC-T2CEP-074
OGLE-LMC-T2CEP-075
OGLE-LMC-T2CEP-076
OGLE-LMC-T2CEP-077
OGLE-LMC-T2CEP-078
OGLE-LMC-T2CEP-079
OGLE-LMC-T2CEP-080
OGLE-LMC-T2CEP-081
OGLE-LMC-T2CEP-082
OGLE-LMC-T2CEP-083
OGLE-LMC-T2CEP-084
OGLE-LMC-T2CEP-085


OGLE-LMC-T2CEP-086
OGLE-LMC-T2CEP-087
OGLE-LMC-T2CEP-088
OGLE-LMC-T2CEP-089
OGLE-LMC-T2CEP-090
OGLE-LMC-T2CEP-091
OGLE-LMC-T2CEP-092
OGLE-LMC-T2CEP-093
OGLE-LMC-T2CEP-094
OGLE-LMC-T2CEP-095
OGLE-LMC-T2CEP-096
OGLE-LMC-T2CEP-097
OGLE-LMC-T2CEP-141


OGLE-LMC-T2CEP-142
OGLE-LMC-T2CEP-143
OGLE-LMC-T2CEP-144
OGLE-LMC-T2CEP-145
OGLE-LMC-T2CEP-146
OGLE-LMC-T2CEP-147
OGLE-LMC-T2CEP-148
OGLE-LMC-T2CEP-149
OGLE-LMC-T2CEP-150
OGLE-LMC-T2CEP-151
OGLE-LMC-T2CEP-152
OGLE-LMC-T2CEP-153
OGLE-LMC-T2CEP-154
OGLE-LMC-T2CEP-155


OGLE-LMC-T2CEP-156
OGLE-LMC-T2CEP-157
OGLE-LMC-T2CEP-158
OGLE-LMC-T2CEP-159
OGLE-LMC-T2CEP-160
OGLE-LMC-T2CEP-161
OGLE-LMC-T2CEP-162
OGLE-LMC-T2CEP-163
OGLE-LMC-T2CEP-164
OGLE-LMC-T2CEP-165
OGLE-LMC-T2CEP-166
OGLE-LMC-T2CEP-167
OGLE-LMC-T2CEP-168
OGLE-LMC-T2CEP-169


OGLE-LMC-T2CEP-171
OGLE-LMC-T2CEP-172
OGLE-LMC-T2CEP-173
OGLE-LMC-T2CEP-174
OGLE-LMC-T2CEP-175
OGLE-LMC-T2CEP-176
OGLE-LMC-T2CEP-177
OGLE-LMC-T2CEP-178
OGLE-LMC-T2CEP-179
OGLE-LMC-T2CEP-180
OGLE-LMC-T2CEP-181
OGLE-LMC-T2CEP-182
OGLE-LMC-T2CEP-183
OGLE-LMC-T2CEP-184


OGLE-LMC-T2CEP-185
OGLE-LMC-T2CEP-186
OGLE-LMC-T2CEP-187
OGLE-LMC-T2CEP-188
OGLE-LMC-T2CEP-189
OGLE-LMC-T2CEP-190
OGLE-LMC-T2CEP-191
OGLE-LMC-T2CEP-192
OGLE-LMC-T2CEP-193
OGLE-LMC-T2CEP-194
OGLE-LMC-T2CEP-195
OGLE-LMC-T2CEP-196
OGLE-LMC-T2CEP-197
OGLE-LMC-T2CEP-198


OGLE-LMC-T2CEP-199
OGLE-LMC-T2CEP-200
OGLE-LMC-T2CEP-201
OGLE-LMC-T2CEP-202
OGLE-LMC-T2CEP-203
Data stored.


### Retrieve Data from Database

In [None]:
# Retrieving star_data from database
star_data_cursor.execute('SELECT star_features FROM astrouw_data')
for row in star_data_cursor.fetchall()[:2]:
    # BLOB field needs to be strinyfied with str() before loading with pickle
    data = pickle.loads(str(row[0]))
    print(data)
    print(len(data))

# Close cursor and database    
star_data_cursor.close
star_data_db.close()

In [None]:
print('test')