![QuantConnect Logo](https://cdn.quantconnect.com/web/i/icon.png)
<hr>

In [22]:
qb = QuantBook()

earnings = qb.add_data(EODHDUpcomingEarnings, "earnings").symbol

be_tested = qb.history(earnings, datetime(2015, 1, 1), datetime(2025, 2, 20), Resolution.DAILY)
cleaned_be_tested = be_tested.reset_index()[["symbol", "reportdate", "estimate"]].groupby(["symbol", "reportdate"]).last()
cleaned_be_tested.index = pd.MultiIndex.from_tuples([(x.value, pd.to_datetime(y)) for x, y in cleaned_be_tested.index])
cleaned_be_tested = cleaned_be_tested[cleaned_be_tested.index.get_level_values(1) < datetime(2025, 2, 20)]
cleaned_be_tested

In [23]:
file_path = qb.object_store.get_file_path("earnings_reference.csv")
cleaned_benchmark = pd.read_csv(file_path, index_col=[0, 1]).dropna()
cleaned_benchmark.index = pd.MultiIndex.from_tuples([(x, pd.to_datetime(y)) for x, y in cleaned_benchmark.index])
cleaned_benchmark = cleaned_benchmark[cleaned_benchmark.index.get_level_values(0).isin(cleaned_be_tested.index.get_level_values(0).unique())]
cleaned_benchmark

In [40]:
def create_confusion_maxtrix(cleaned_be_tested, cleaned_benchmark, N, date_since=None):
    if date_since:
        cleaned_be_tested = cleaned_be_tested[(cleaned_be_tested.index.get_level_values(1) >= date_since) & (cleaned_be_tested.index.get_level_values(1) <= qb.start_date - timedelta(2))]
        cleaned_benchmark = cleaned_benchmark[(cleaned_benchmark.index.get_level_values(1) >= date_since) & (cleaned_be_tested.index.get_level_values(1) <= qb.start_date - timedelta(2))]

    index_to_be_tested = list(cleaned_benchmark.index)
    k = 0
    while k < N:
        index_to_be_tested += list(pd.MultiIndex.from_tuples([(s, i+timedelta(k)) for (s, i) in cleaned_benchmark.index])) + list(pd.MultiIndex.from_tuples([(s, i-timedelta(k)) for (s, i) in cleaned_benchmark.index]))
        k += 1

    tp = cleaned_be_tested[cleaned_be_tested.index.isin(index_to_be_tested)]
    fp = cleaned_be_tested[~cleaned_be_tested.index.isin(index_to_be_tested)]

    index_to_be_tested = list(cleaned_be_tested.index)
    k = 0
    while k < N:
        index_to_be_tested += list(pd.MultiIndex.from_tuples([(s, i+timedelta(k)) for (s, i) in cleaned_be_tested.index])) + list(pd.MultiIndex.from_tuples([(s, i-timedelta(k)) for (s, i) in cleaned_be_tested.index]))
        k += 1

    fn = cleaned_benchmark[~cleaned_benchmark.index.isin(index_to_be_tested)]

    # Create confusion matrix
    # True Negatives (TN) is set to 0
    confusion_matrix = np.array([[tp.shape[0], fn.shape[0]], [fp.shape[0], 0]])
    return confusion_matrix

In [42]:
cm = create_confusion_maxtrix(cleaned_be_tested, cleaned_benchmark, 1)
print(f"±1 days Precision: {cm[0][0]/np.sum(cm[0])}, Missed Rate: {cm[0][1]/np.sum(cm[0])}, False Positive Rate (to be investigated): {cm[1][0]/np.sum(cm[:, 0])}")

In [43]:
cm = create_confusion_maxtrix(cleaned_be_tested, cleaned_benchmark, 3)
print(f"±3 days Precision: {cm[0][0]/np.sum(cm[0])}, Missed Rate: {cm[0][1]/np.sum(cm[0])}, False Positive Rate (to be investigated): {cm[1][0]/np.sum(cm[:, 0])}")

In [44]:
cm = create_confusion_maxtrix(cleaned_be_tested, cleaned_benchmark, 7)
print(f"±7 days Accuracy: {cm[0][0]/np.sum(cm[0])}, Missed Rate: {cm[0][1]/np.sum(cm[0])}, False Positive Rate (to be investigated): {cm[1][0]/np.sum(cm[:, 0])}")