### Statistics on Delta T Values

- Data from https://maia.usno.navy.mil/ser7/deltat.data

In [1]:
import common
from datetime import datetime

assert common.USNO_DATA_PATH.exists(), 'Dataset not found'

In [2]:
lines = filter(
  lambda l: len(l.strip()),
  common.USNO_DATA_PATH.open('r').readlines(),
)

def parse_line(line: str) -> tuple[float, float]:
  '''Parse a line from the dataset. Return (year, delta_t).'''
  year, month, day, delta_t = line.split()

  ymd: datetime = datetime(int(year), int(month), int(day))
  year_len: int = (datetime(int(year) + 1, 1, 1) - datetime(int(year), 1, 1)).days
  past_len: int = (ymd - datetime(int(year), 1, 1)).days

  fraction: float = past_len / year_len
  
  return float(year) + fraction, float(delta_t)

parsed = list(map(parse_line, lines))

In [3]:
parsed[::41]

[(1973.0849315068492, 43.4724),
 (1976.4972677595629, 46.997),
 (1979.9150684931508, 50.4599),
 (1983.3287671232877, 53.3024),
 (1986.7479452054795, 55.1898),
 (1990.1616438356164, 56.9755),
 (1993.5808219178082, 59.6343),
 (1997.0, 62.295),
 (2000.4153005464482, 63.9691),
 (2003.8328767123287, 64.5544),
 (2007.2465753424658, 65.2494),
 (2010.6657534246576, 66.2349),
 (2014.0849315068492, 67.3136),
 (2017.495890410959, 68.8245),
 (2020.9153005464482, 69.363)]

In [4]:
import numpy as np
from common import delta_t_algo1, delta_t_algo2, delta_t_algo3

years    = np.array([y for y, _ in parsed])
observed = np.array([v for _, v in parsed])

vf_algo1 = np.vectorize(delta_t_algo1)
vf_algo2 = np.vectorize(delta_t_algo2)
vf_algo3 = np.vectorize(delta_t_algo3)

algo1_pred = vf_algo1(years)
algo2_pred = vf_algo2(years)
algo3_pred = vf_algo3(years)

print(f'{len(parsed)} - {len(algo1_pred)} - {len(algo2_pred)} - {len(algo3_pred)}')


614 - 614 - 614 - 614


In [5]:
import pandas as pd

df = pd.DataFrame({
  'year':     years,
  'observed': observed,
  'algo1':    algo1_pred,
  'algo2':    algo2_pred,
  'algo3':    algo3_pred,
})

df.iloc[::29, :]

Unnamed: 0,year,observed,algo1,algo2,algo3
0,1973.084932,43.4724,43.371883,43.402298,43.402298
29,1975.49589,45.982,45.948788,45.977999,45.977999
58,1977.915068,48.4355,48.469036,48.493195,48.493195
87,1980.330601,50.8454,51.212542,50.8175,50.8175
116,1982.747945,52.734,52.731642,52.838379,52.838379
145,1985.161644,54.432,54.211091,54.433929,54.433929
174,1987.580822,55.6004,55.690347,55.578296,55.578296
203,1990.0,56.8553,57.2,56.894641,56.894641
232,1992.415301,58.6917,58.771437,58.657319,58.657319
261,1994.832877,60.6324,60.442491,60.658554,60.658554


In [6]:
from sklearn import metrics

results = []

results.append(('MAE', 
                metrics.mean_absolute_error(observed, algo1_pred),
                metrics.mean_absolute_error(observed, algo2_pred),
                metrics.mean_absolute_error(observed, algo3_pred),))

results.append(('MSE', 
                metrics.mean_squared_error(observed, algo1_pred),
                metrics.mean_squared_error(observed, algo2_pred),
                metrics.mean_squared_error(observed, algo3_pred),))

results.append(('RMSE', 
                metrics.root_mean_squared_error(observed, algo1_pred),
                metrics.root_mean_squared_error(observed, algo2_pred),
                metrics.root_mean_squared_error(observed, algo3_pred),))

results.append(('R2', 
                metrics.r2_score(observed, algo1_pred),
                metrics.r2_score(observed, algo2_pred),
                metrics.r2_score(observed, algo3_pred),))

results.append(('MAPE', 
                metrics.mean_absolute_percentage_error(observed, algo1_pred),
                metrics.mean_absolute_percentage_error(observed, algo2_pred),
                metrics.mean_absolute_percentage_error(observed, algo3_pred),))

results.append(('Max Error', 
                metrics.max_error(observed, algo1_pred),
                metrics.max_error(observed, algo2_pred),
                metrics.max_error(observed, algo3_pred),))

pd.DataFrame(results, columns=['Metric', 'Algo1', 'Algo2', 'Algo3'])

Unnamed: 0,Metric,Algo1,Algo2,Algo3
0,MAE,1.618161,0.598376,0.147195
1,MSE,13.445706,1.429614,0.139673
2,RMSE,3.666839,1.195665,0.373728
3,R2,0.75742,0.974208,0.99748
4,MAPE,0.023777,0.008825,0.002231
5,Max Error,14.541698,4.780888,2.106707


In [7]:
'Apparently Algo3 is better.'

'Apparently Algo3 is better.'