In this notebook, I fit the linear drift of the radio source coordinate time series.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
# from matplotlib.ticker import MultipleLocator
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)
%matplotlib inline
%config InlineBackend.figure_format = "svg"

# My progs
from read_ts import get_ts

In [2]:
souname = "2250+190"
coordts = get_ts(souname, "../data/ts")

AttributeError: 'numpy.ndarray' object has no attribute 'info'

In [None]:
coordts

# 1 Linear drift test

## 1.1 Without weights, with intercept

In [None]:
# Create linear regression object
regr1 = linear_model.LinearRegression()

mjy = np.array(coordts["mjy"])
mjy = mjy[:, np.newaxis]

# Train the model using the training sets
reg1 = regr1.fit(mjy, coordts["dra"])

# # Make predictions using the testing set
dra1 = reg1.predict(mjy)

# The coefficients
print("t_0 = 0")
print("Coefficients: %.3f mas/yr" % reg1.coef_[0])
print("Interception: %.3f mas" % reg1.intercept_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(coordts["dra"], dra1))

# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f"
      % r2_score(coordts["dra"], dra1))

In [None]:
# Create linear regression object
mjy = np.array(coordts["mjy"]-2000)
mjy = mjy[:, np.newaxis]

# Train the model using the training sets
reg1_a = regr1.fit(mjy, coordts["dra"])

# # Make predictions using the testing set
dra1_a = reg1_a.predict(mjy)

# The coefficients
print("t_0 = 2000")
print("Coefficients: %.3f mas/yr" % reg1_a.coef_[0])
print("Interception: %.3f mas" % reg1_a.intercept_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(coordts["dra"], dra1_a))

# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f"
      % r2_score(coordts["dra"], dra1_a))

In [None]:
mjy = np.array(coordts["mjy"]-2015)
mjy = mjy[:, np.newaxis]

# Train the model using the training sets
reg1_b = regr1.fit(mjy, coordts["dra"])

# # Make predictions using the testing set
dra1_b = reg1_b.predict(mjy)

# The coefficients
print("t_0 = 2000")
print("Coefficients: %.3f mas/yr" % reg1_b.coef_[0])
print("Interception: %.3f mas" % reg1_b.intercept_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(coordts["dra"], dra1_b))

# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f"
      % r2_score(coordts["dra"], dra1_b))

## 1.2 -- Without weights, without intercept

In [None]:
# Create linear regression object
regr2 = linear_model.LinearRegression(fit_intercept=False)

mjy = np.array(coordts["mjy"])
mjy = mjy[:, np.newaxis]

# Train the model using the training sets
reg2 = regr2.fit(mjy, coordts["dra"])

# # Make predictions using the testing set
dra2 = reg2.predict(mjy)

# The coefficients
print("Coefficients: %.3f mas/yr" % reg2.coef_[0])
print("Interception: %.3f mas" % reg2.intercept_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(coordts["dra"], dra2))

# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f"
      % r2_score(coordts["dra"], dra2))

## 1.3 -- With weights, with intercept

In [None]:
# Create linear regression object
mjy = np.array(coordts["mjy"]-2015)
mjy = mjy[:, np.newaxis]

wgt = np.sum(1 / coordts["ra_err"]**2) / coordts["ra_err"]**2

# Train the model using the training sets
reg3 = regr1.fit(mjy, coordts["dra"], wgt)

# # Make predictions using the testing set
dra3 = reg3.predict(mjy)

# The coefficients
print("Coefficients: %.3f mas/yr" % reg3.coef_[0])
print("Interception: %.3f mas" % reg3.intercept_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(coordts["dra"], dra3))

# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f"
      % r2_score(coordts["dra"], dra3))

## 1.4 -- With weights, without intercept

In [None]:
# Create linear regression object
mjy = np.array(coordts["mjy"]-2015)
mjy = mjy[:, np.newaxis]

# Train the model using the training sets
reg4 = regr2.fit(mjy, coordts["dra"], 1 / coordts["ra_err"]**2)

# # Make predictions using the testing set
dra4 = reg4.predict(mjy)

# The coefficients
print("Coefficients: %.3f mas/yr" % reg4.coef_[0])
print("Interception: %.3f mas" % reg4.intercept_)

# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(coordts["dra"], dra4))

# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f"
      % r2_score(coordts["dra"], dra4))

## 1.5 -- RANSAC algorithm

In [None]:
ransac = linear_model.RANSACRegressor()
reg5 = ransac.fit(mjy, coordts["dra"], 1 / coordts["ra_err"]**2)
inlier_mask = reg5.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
dra5 = reg5.predict(mjy)

## 1.6 Ordinary Least Square (OLS) from statsmodels

In [None]:
import statsmodels.api as sm

In [None]:
mjy = np.array(coordts["mjy"]-2015)
mjy = sm.add_constant(mjy)

mod_ols = sm.OLS(coordts["dra"], mjy)
res_ols = mod_ols.fit()

print(res_ols.summary())
dra6 = res_ols.fittedvalues

## 1.7 Weighted Least Square (WLS) from statsmodels

In [None]:
mjy = np.array(coordts["mjy"]-2015)
mjy = sm.add_constant(mjy)

mod_wls = sm.WLS(coordts["dra"], mjy, weights=1./(coordts["ra_err"] ** 2))
res_wls = mod_wls.fit()
print(res_wls.summary())

dra7 = res_wls.fittedvalues

In [None]:
res_wls.params

In [None]:
res_wls.bse

## 1.8 Curve_fit

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File name: linear_fit.py
"""
Created on Thu Mar 28 10:40:19 2019

@author: Neo(liuniu@smail.nju.edu.cn)
"""


import numpy as np
from numpy import sqrt
from scipy.optimize import curve_fit

def linear_func(x, a, b):
    return a + b * x


def linear_fit1(x, y, yerr=None, return_mod=False):
    """(Weighted) Linear fitting of y(x) = offset + drift * x

    Parameters
    ---------
    x / y : series

    Returns
    -------
    offset/offset_err : float
        estimate and formal uncertainty of offset
    drift/drift_err : float
        estimate and formal uncertainty of offset
    y_model : array_like of float
        predicition series from linear model of y
    """

    if yerr is None:
        popt, pcov = curve_fit(linear_func, x, y)
    else:
        popt, pcov = curve_fit(
            linear_func, x, y, sigma=yerr, absolute_sigma=True)

    offset, drift = popt
    offset_err, drift_err = sqrt(pcov[0, 0]), sqrt(pcov[1, 1])
    corr = pcov[0, 1] / offset_err / drift_err

    # Prediction
    y_model = linear_func(x, *popt)

    if return_mod:
        return offset, offset_err, drift, drift_err, corr, y_model
    else:
        return offset, offset_err, drift, drift_err, corr
    
def linear_fit2(x, y, yerr=None, return_mod=False):
    """(Weighted) Linear fitting of y(x) = offset + drift * x

    Parameters
    ---------
    x / y : series

    Returns
    -------
    offset/offset_err : float
        estimate and formal uncertainty of offset
    drift/drift_err : float
        estimate and formal uncertainty of offset
    y_model : array_like of float
        predicition series from linear model of y
    """

    if yerr is None:
        popt, pcov = curve_fit(linear_func, x, y)
    else:
        popt, pcov = curve_fit(
            linear_func, x, y, sigma=yerr, absolute_sigma=False)

    offset, drift = popt
    offset_err, drift_err = sqrt(pcov[0, 0]), sqrt(pcov[1, 1])
    corr = pcov[0, 1] / offset_err / drift_err

    # Prediction
    y_model = linear_func(x, *popt)

    if return_mod:
        return offset, offset_err, drift, drift_err, corr, y_model
    else:
        return offset, offset_err, drift, drift_err, corr

In [None]:
mjy = np.array(coordts["mjy"]-2015)
pmt1 = linear_fit1(mjy, coordts["dra"], yerr=coordts["ra_err"], return_mod=True)
pmt2 = linear_fit2(mjy, coordts["dra"], yerr=coordts["ra_err"], return_mod=True)

se = np.vstack([[pmt1[1], pmt1[3]], 
                [pmt2[1], pmt2[3]]])
se = np.round(se, 4)

colnames = ["x0_err", "x1_err"]
rownames = ["ABS", "RET"]

tab = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)

print("Formal errors are ")
print(tab)

In [None]:
se = np.vstack([[reg1.coef_[0], reg1.intercept_], 
                [reg2.coef_[0], reg2.intercept_], 
                [reg3.coef_[0], reg3.intercept_],
                [reg4.coef_[0], reg4.intercept_], 
                [0, 0],
                [res_ols.params[1], res_ols.params[0]], 
                [res_wls.params[1], res_wls.params[0]],
                [pmt1[2], pmt1[0]], 
                [pmt2[2], pmt2[0]]])

se = np.round(se, 4)

colnames = ["x1", "x0"]
rownames = ["SK_OWI", "SK_ONI", "SK_WWI", "SK_WNI", "SK_RAN",
           "ST_OLS", "ST_WLS", "CF_ABS", "CF_RET"]

tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)
print(tabl)

In [None]:
# Plot outputs
plt.errorbar(coordts["mjy"], coordts["dra"], yerr=coordts["ra_err"], 
             color="black", ms=2, fmt=".", elinewidth=0.2)
plt.plot(coordts["mjy"], dra1, color="b", linewidth=1, label="Method#1")
plt.plot(coordts["mjy"], dra2, color="y", linewidth=1, label="Method#2")
plt.plot(coordts["mjy"], dra3, color="r", linewidth=1, label="Method#3")
plt.plot(coordts["mjy"], dra4, color="g", linewidth=1, label="Method#4")
plt.plot(coordts["mjy"], dra5, color="m", linewidth=1, label="Method#5")
plt.plot(coordts["mjy"], dra6, color="royalblue", linewidth=1, label="Method#6")
plt.plot(coordts["mjy"], dra7, color="skyblue", linewidth=1, label="Method#7")

plt.legend(fontsize="x-small")
plt.ylim([-10, 10])

# 2. statistics of the time series

In [None]:
X_a = coordts["dra"] / coordts["ra_err"]
X_d = coordts["ddec"] / coordts["dec_err"]

In [None]:
coordts1 = coordts[((X_a <= 10) & (coordts["dra"] < 10))]
coordts2 = coordts[((X_d <= 10) & (coordts["ddec"] < 10))]