### Dependencies

In [1]:
import sqlite3 # Database library.
import os # Folder management library.
import pickle # Serializing module.
import numpy as np # Scientific computing library.

### Retrieving Star Data

In [2]:
# Initializing database and cursor.
star_data_db = sqlite3.connect('star_data.db')
star_data_cursor = star_data_db.cursor()

# Retrieving star_data from database.
classes = ['cep_1o', 'cep_f', 'dsct','eb_ec', 'eb_ed', 'eb_esd', 'lpv_mira_agb_c', 'lpv_mira_agb_o', 'lpv_osarg_agb',
           'lpv_osarg_rgb', 'lpv_srv_agb_c', 'lpv_srv_agb_o', 'rrab', 'rrc', 'rrd', 'rre', 't2cep']

X, Y = [], []
for label, classv in enumerate(classes):
    temp_X, temp_Y = [], []
    star_data_cursor.execute('SELECT star_features FROM '+classv)
    for row in star_data_cursor.fetchall()[:600]:
        # Deserializing features.
        features = pickle.loads(row[0])
        temp_X.append(features)
        if label != 16:
            temp_Y.append(0)
        if label == 16:
            temp_Y.append(1)
    X.append(temp_X)
    Y.append(temp_Y)

# Randomly shuffle data set for each label.
for label, class_set in enumerate(X):
    np.random.shuffle(X[label])

# Form train, cross-validate, and test sets.
# with 380 examples in train set, 110 in test set, and at most 110 in cv set.
train_X, test_X = [], []
train_Y, test_Y = [], []

for label, class_set in enumerate(X[:16]):
    train_X.append(X[label][0:200])
    test_X.append(X[label][200:601])
    
    train_Y.append(Y[label][0:200])
    test_Y.append(Y[label][200:601])

# For t2cep
train_X.append(X[16][0:450])
test_X.append(X[16][450:601])

train_Y.append(Y[16][0:450])
test_Y.append(Y[16][450:601])
    
# Flatten lists.
train_X = [item for sublist in train_X for item in sublist]
test_X = [item for sublist in test_X for item in sublist]

train_Y = [item for sublist in train_Y for item in sublist]
test_Y = [item for sublist in test_Y for item in sublist]

# Close cursor and database.    
star_data_cursor.close
star_data_db.close()

print('Done.')

Done.


### Creating Binary Classifier Data Sets -- Training, Cross Validation, Test

In [3]:
# Initializing database and cursor.
star_sets_db = sqlite3.connect('star_data_sets_binary_equal.db')
star_sets_cursor = star_sets_db.cursor()

# Initializing table--(star type) and data type--(BLOB).
star_sets_cursor.execute("CREATE TABLE IF NOT EXISTS train_set(X BLOB, Y BLOB)")
star_sets_cursor.execute("CREATE TABLE IF NOT EXISTS test_set(X BLOB, Y BLOB)")

# Serializing Data so that it can be stored in database.
train_X_pickled = pickle.dumps(train_X, pickle.HIGHEST_PROTOCOL)
train_Y_pickled = pickle.dumps(train_Y, pickle.HIGHEST_PROTOCOL)
test_X_pickled = pickle.dumps(test_X, pickle.HIGHEST_PROTOCOL)
test_Y_pickled = pickle.dumps(test_Y, pickle.HIGHEST_PROTOCOL)

# Storing star_data in database for future reference.
star_sets_cursor.execute("INSERT INTO train_set(X, Y) VALUES (?,?)",
                         (sqlite3.Binary(train_X_pickled), sqlite3.Binary(train_Y_pickled)))
star_sets_cursor.execute("INSERT INTO test_set(X, Y) VALUES (?,?)",
                         (sqlite3.Binary(test_X_pickled), sqlite3.Binary(test_Y_pickled)))
star_sets_db.commit()

# Close cursor and database    
star_sets_cursor.close
star_sets_db.close()

print("Done.")

Done.


In [4]:
print(len(train_X))

3650
