In [1]:
# Load packages
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn import linear_model

In [2]:
# Set the style of display
plt.style.use("ggplot")
%matplotlib inline

In [3]:
data = pd.read_csv('./data/image_database.txt')

In [4]:
data.columns

Index([u'starname', u'ra_act', u'dec_act', u'x_act_mx1', u'x_act_mx2',
       u'x_act_mx3', u'x_act_my3', u'x_act_my4', u'x_act_px1', u'x_act_px2',
       u'x_act_px3', u'x_act_py1', u'x_act_py2', u'x_act_py3', u'x_act_py4',
       u'x_img_mx1', u'x_img_mx2', u'x_img_mx3', u'x_img_my3', u'x_img_my4',
       u'x_img_px1', u'x_img_px2', u'x_img_px3', u'x_img_py1', u'x_img_py2',
       u'x_img_py3', u'x_img_py4', u'y_act_mx1', u'y_act_mx2', u'y_act_mx3',
       u'y_act_my3', u'y_act_my4', u'y_act_px1', u'y_act_px2', u'y_act_px3',
       u'y_act_py1', u'y_act_py2', u'y_act_py3', u'y_act_py4', u'y_img_mx1',
       u'y_img_mx2', u'y_img_mx3', u'y_img_my3', u'y_img_my4', u'y_img_px1',
       u'y_img_px2', u'y_img_px3', u'y_img_py1', u'y_img_py2', u'y_img_py3',
       u'y_img_py4', u'v_global_x', u'v_global_y', u'v_global_z',
       u'err_mag_mx3', u'err_r_mx3', u'err_ang_mx3', u'err_mag_py3',
       u'err_r_py3', u'err_ang_py3', u'err_mag_my3', u'err_r_my3',
       u'err_ang_my3', u'err_mag_p

In [5]:
def fetch_data(camera, image_num):
    """ Fetches the star data from a given image taken from a given camera
    Parameters
    ----------
    camera : str
        The camera name
    image_num: str
        The number of an image from a camera. 
        If set to ".", all the images will be returned
        
    Returns
    -------
    Pandas.DataFrames
        Returns a data fram with NaN entries removed
    """
    # Load all the star data
    data = pd.read_csv('./data/image_database.txt')
    
    # Fetch the data of interest set by "camera" and "image_num"
    regex_txt = "err_.*" + camera + image_num
    df = data.filter(regex=regex_txt)
    
    return df

In [6]:
# Fetch all the data taken from camera "mx" 
camera = "mx"
image_num = "."

df = fetch_data(camera, image_num)
df.head()

Unnamed: 0,err_mag_mx3,err_r_mx3,err_ang_mx3,err_mag_mx2,err_r_mx2,err_ang_mx2,err_mag_mx1,err_r_mx1,err_ang_mx1
0,,,84.85926,,,84.85926,,,84.85926
1,,,34.887684,,,34.887684,,,34.887684
2,,,94.018541,,,94.018541,,,94.018541
3,,,93.243282,,,93.243282,,,93.243282
4,,,93.064243,,,93.064243,,,93.064243


In [7]:
df.describe()

Unnamed: 0,err_mag_mx3,err_r_mx3,err_ang_mx3,err_mag_mx2,err_r_mx2,err_ang_mx2,err_mag_mx1,err_r_mx1,err_ang_mx1
count,112.0,112.0,98.0,115.0,115.0,98.0,21.0,21.0,98.0
mean,1.70879,542.145293,101.438478,2.156539,531.218733,101.438478,0.929675,563.367564,101.438478
std,0.727047,263.917057,27.460691,0.625118,251.130435,27.460691,0.611878,186.577424,27.460691
min,0.1114,9.102225,15.155884,0.386588,9.218313,15.155884,0.08268,156.32858,15.155884
25%,1.263753,348.696906,82.916111,1.643794,356.533943,82.916111,0.62736,532.972339,82.916111
50%,1.744043,606.23898,105.832814,2.153845,596.907934,105.832814,0.823817,613.679713,105.832814
75%,2.171537,745.952198,116.660812,2.685418,740.612167,116.660812,1.076075,658.501959,116.660812
max,3.359036,1013.690734,170.359913,3.149516,1009.86462,170.359913,3.119703,816.38106,170.359913


## Build a Regression Model


In [32]:
def prepare_data(df, camera, image_nums=["1", "2"]):
    
    x_train = []
    y_train = []
    for l in image_nums:
            regex_txt = "err_.*" + camera + l
            df_tmp = df.filter(regex=regex_txt)
            df_tmp.dropna(inplace=True)
            cols = ["err_mag_" + camera + l, "err_ang_"+ camera + l]
            df_tmp = df.loc[:, cols]
            y_train.append(df_tmp.as_matrix())
            #cols = ["err_r_" + camera + l, "x_img_" + camera + l, "y_img_" + camera + l]
            cols = ["err_r_" + camera + l]
            df_tmp = df.loc[:, cols]
            x_train.append(df_tmp.as_matrix())
            
    y_train = np.vstack(tuple(y_train))
    x_train = np.vstack(tuple(x_train))
    return x_train, y_train

#y_train = np.reshape(df.err_mag_mx2.as_matrix(), (df.shape[0], 1))
#x_train = np.reshape(df.err_r_mx2.as_matrix(), (df.shape[0], 1))

x_train, y_train = prepare_data(df, camera, image_nums=["1", "2"])

# Create polynomial features
#x_train_poly = np.hstack((x_train, np.power(x_train,2), np.power(x_train,3)))
#poly = preprocessing.PolynomialFeatures(degree=3, interaction_only=False, include_bias=True)
#x_train_poly=poly.fit_transform(x_train)

# Fit a linear regression model
#LR = linear_model.LinearRegression(normalize=True)
#LR.fit(x_train_poly, y_train)

In [35]:
y_train.shape

(14, 2)

In [None]:
l = "1"
cols = ["err_mag_" + camera + l, "err_ang_"+ camera + l]
df_tmp = df.loc[:, cols].dropna()
df_tmp.as_matrix().shape


In [None]:
df.loc[:, cols].dropna()

In [None]:
# Plot the predicted error magnitude vs "true" error magnitude
fig, axes = plt.subplots(1,2, figsize=(15,6))
y_pred = LR.predict(x_train_poly)
axes[0].plot(y_train, y_pred, '.r')

# Plot the absolute error
abs_err = np.abs(y_train - y_pred) 
axes[1].plot(abs_err, '.b')

In [None]:
# Plot error vector's distance-to-center-pixel vs "true " error magnitude 
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(x_train,y_train, '.r')

In [None]:
# Plot error vector's distance-to-center-pixel vs error velctor angle
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(x_train, df.err_ang_mx2.as_matrix(), '.r')

In [None]:
df.head()

In [None]:
LR.coef_

In [None]:
LR.intercept_

## Model Evaluation

In [None]:
    idx_err_r_mx = data.filter(regex=("err_r_" + im_label)).notnull().any(axis=1)
    df_err_r_mx = data.filter(regex=("err_r_" + im_label)).loc[idx_err_r_mx]

    idx_err_ang_mx = data.filter(regex=("err_ang_" + im_label)).notnull().any(axis=1)
    df_err_ang_mx = data.filter(regex=("err_ang_" + im_label)).loc[idx_err_ang_mx]

    idx_err_mag_mx = data.filter(regex=("err_mag_" + im_label)).notnull().any(axis=1)
    df_err_mag_mx = data.filter(regex=("err_mag_" + im_label)).loc[idx_err_mag_mx]
    
    # Join the above dataframes
    df = df_err_r_mx.join(df_err_mag_mx, how="outer")
    df = df.join(df_err_ang_mx, how="outer")