### Statistics on Delta T Values

- Data from https://maia.usno.navy.mil/ser7/deltat.data

In [1]:
import common
from datetime import datetime

assert common.USNO_DATA_PATH.exists(), 'Dataset not found'

In [2]:
lines = filter(
  lambda l: len(l.strip()),
  common.USNO_DATA_PATH.open('r').readlines(),
)

def parse_line(line: str) -> tuple[float, float]:
  '''Parse a line from the dataset. Return (year, delta_t).'''
  year, month, day, delta_t = line.split()

  ymd: datetime = datetime(int(year), int(month), int(day))
  year_len: int = (datetime(int(year) + 1, 1, 1) - datetime(int(year), 1, 1)).days
  past_len: int = (ymd - datetime(int(year), 1, 1)).days

  fraction: float = past_len / year_len
  
  return float(year) + fraction, float(delta_t)

parsed = list(map(parse_line, lines))

In [3]:
parsed[::41]

[(1973.0849315068492, 43.4724),
 (1976.4972677595629, 46.997),
 (1979.9150684931508, 50.4599),
 (1983.3287671232877, 53.3024),
 (1986.7479452054795, 55.1898),
 (1990.1616438356164, 56.9755),
 (1993.5808219178082, 59.6343),
 (1997.0, 62.295),
 (2000.4153005464482, 63.9691),
 (2003.8328767123287, 64.5544),
 (2007.2465753424658, 65.2494),
 (2010.6657534246576, 66.2349),
 (2014.0849315068492, 67.3136),
 (2017.495890410959, 68.8245),
 (2020.9153005464482, 69.363)]

In [4]:
import numpy as np
from common import delta_t_algo1, delta_t_algo2

years            = np.array([y for y, _ in parsed])
expected_values  = np.array([v for _, v in parsed])

vf_algo1 = np.vectorize(delta_t_algo1)
vf_algo2 = np.vectorize(delta_t_algo2)

algo1_values = vf_algo1(years)
algo2_values = vf_algo2(years)

print(f'{len(parsed)} - {len(algo1_values)} - {len(algo2_values)}')


614 - 614 - 614


In [5]:
import pandas as pd

df = pd.DataFrame({
  'year':           years,
  'expected_value': expected_values,
  'algo1_value':    algo1_values,
  'algo2_value':    algo2_values
})

pd.concat([df.head(5), df.tail(5)])

Unnamed: 0,year,expected_value,algo1_value,algo2_value
0,1973.084932,43.4724,43.371883,43.402298
1,1973.161644,43.5648,43.453718,43.484129
2,1973.246575,43.6737,43.544376,43.574779
3,1973.328767,43.7782,43.632159,43.662553
4,1973.413699,43.8763,43.722914,43.753298
609,2023.832877,69.1727,83.216453,73.772823
610,2023.915068,69.1724,83.343665,73.821237
611,2024.0,69.1752,83.47516,73.871344
612,2024.084699,69.1797,83.606341,73.921394
613,2024.163934,69.1874,83.729098,73.968288


In [8]:
from sklearn import metrics

results = []

results.append(('MAE', 
                metrics.mean_absolute_error(expected_values, algo1_values),
                metrics.mean_absolute_error(expected_values, algo2_values),))

results.append(('MSE', 
                metrics.mean_squared_error(expected_values, algo1_values),
                metrics.mean_squared_error(expected_values, algo2_values),))

results.append(('RMSE', 
                metrics.root_mean_squared_error(expected_values, algo1_values),
                metrics.root_mean_squared_error(expected_values, algo2_values),))

results.append(('R2', 
                metrics.r2_score(expected_values, algo1_values),
                metrics.r2_score(expected_values, algo2_values),))

results.append(('MAPE', 
                metrics.mean_absolute_percentage_error(expected_values, algo1_values),
                metrics.mean_absolute_percentage_error(expected_values, algo2_values),))

results.append(('Max Error', 
                metrics.max_error(expected_values, algo1_values),
                metrics.max_error(expected_values, algo2_values),))

pd.DataFrame(results, columns=['Metric', 'Algo1', 'Algo2'])

Unnamed: 0,Metric,Algo1,Algo2
0,MAE,1.618161,0.598376
1,MSE,13.445706,1.429614
2,RMSE,3.666839,1.195665
3,R2,0.75742,0.974208
4,MAPE,0.023777,0.008825
5,Max Error,14.541698,4.780888


In [7]:
'Apparently Algo2 is better.'

'Apparently Algo2 is better.'