In [132]:
# Read in goal_shot_rows.csv

import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime

import data_prep

pd.options.mode.chained_assignment = None  # default='warn'

model_path = '0427Model_2M.pt'

In [133]:
def create_input_data_from_csv(path_to_csv):
    df = pd.read_csv(path_to_csv)

    # get list of ilocs of rows where the 'Event' column is 'GOAL or SHOT or MISS'
    goal_shot_rows = df.loc[df['Event'].isin(['GOAL', 'SHOT', 'MISS'])]

    # shift the dataframe by 1 row
    shifted_df = df.shift(1)
    shifted_df.columns = ['prev_' + name for name in df.columns]

    # combine goal_shot_rows with shifted_df, but using rows from goal_shot_rows
    goal_shot_rows = pd.concat([goal_shot_rows, shifted_df.loc[goal_shot_rows.index]], axis=1)

    goal_shot_rows = goal_shot_rows[goal_shot_rows['Period'] != 0]
    goal_shot_rows = goal_shot_rows[goal_shot_rows['Period'] != 5]

    # Create "home_or_away" column based on if "Ev_team" is the same as "Home_Team" or "Away_Team"
    goal_shot_rows['home_or_away'] = goal_shot_rows.apply(lambda x: 'home' if x['Ev_Team'] == x['Home_Team'] else 'away', axis=1)
    
    # check to see if on power play if "home_or_away" is equal to "home" and "Strength" is equal to "5x4" or "home_or_away" is equal to "away" and "Strength" is equal to "4x5"
    goal_shot_rows['true_strength'] = goal_shot_rows.apply(lambda x: "5x4" if (x['home_or_away'] == 'home' and x['Strength'] == '5x4' ) or (x['home_or_away'] == 'away' and x['Strength'] == '4x5') else "5x5", axis=1)

    # similar as above, but for 5x3
    goal_shot_rows['true_strength'] = goal_shot_rows.apply(lambda x: "5x3" if (x['home_or_away'] == 'home' and x['Strength'] == '5x3' ) or (x['home_or_away'] == 'away' and x['Strength'] == '3x5') else x['true_strength'], axis=1)

    # similar as above, but for 6x5
    goal_shot_rows['true_strength'] = goal_shot_rows.apply(lambda x: "6x5" if (x['home_or_away'] == 'home' and x['Strength'] == '6x5' ) or (x['home_or_away'] == 'away' and x['Strength'] == '5x6') else x['true_strength'], axis=1)

    # similar as above, but for 4x3
    goal_shot_rows['true_strength'] = goal_shot_rows.apply(lambda x: "4x3" if (x['home_or_away'] == 'home' and x['Strength'] == '4x3' ) or (x['home_or_away'] == 'away' and x['Strength'] == '3x4') else x['true_strength'], axis=1)

    # similar as above,but for 3x3
    goal_shot_rows['true_strength'] = goal_shot_rows.apply(lambda x: "3x3" if (x['home_or_away'] == 'home' and x['Strength'] == '3x3' ) or (x['home_or_away'] == 'away' and x['Strength'] == '3x3') else x['true_strength'], axis=1)

    # check to see if a shot is on an empty net by checking if home_or_away is equal to "home" and "Away_Goalie" is equal to "None" or home_or_away is equal to "away" and "Home_Goalie" is equal to "None"
    goal_shot_rows['on_empty_net'] = goal_shot_rows.apply(lambda x: 1 if (x['home_or_away'] == 'home' and x['Away_Goalie'] == '') or (x['home_or_away'] == 'away' and x['Home_Goalie'] == '') else 0, axis=1)


    val_input_data = goal_shot_rows[['Event', 'Period', 'Seconds_Elapsed', 'Strength', 'Type', 'xC', 'yC', 'home_or_away', 'true_strength', 'on_empty_net', 'prev_Event', 'prev_Period', 'prev_Seconds_Elapsed', 'prev_Strength', 'prev_Type', 'prev_xC', 'prev_yC']]
    val_result_data = goal_shot_rows['Event']
    val_result_data = val_result_data.apply(lambda x: 1 if x == "GOAL" else 0)

    val_input_data = val_input_data.drop(columns=['Event'])

    # Split out the data between numeric values (can carry forward) and categorical values (need to be turned into binary columns)
    val_input_data_numeric = val_input_data[['Period', 'Seconds_Elapsed', 'xC', 'yC', 'on_empty_net','prev_Seconds_Elapsed', 'prev_xC', 'prev_yC']]
    val_input_data_categorical = pd.DataFrame()

    for column in ['Type', 'prev_Event', 'true_strength', 'home_or_away']:
        dummy_columns = pd.get_dummies(val_input_data[column])
        val_input_data_categorical = pd.concat([dummy_columns, val_input_data_categorical], axis=1)

    for column in ['prev_Event', 'prev_Strength', 'prev_Type']:
        dummy_columns = pd.get_dummies(val_input_data[column])

        # rename all columns with a prefix of "prev_"
        dummy_columns.columns = ['prev_' + str(col) for col in dummy_columns.columns]
        val_input_data_categorical = pd.concat([val_input_data_categorical, dummy_columns], axis=1)
        
    # change all True/False to 1/0
    val_input_data_categorical = val_input_data_categorical.applymap(lambda x: 1 if x == True else 0)

    # combine the two dataframes
    val_input_data_combined = pd.concat([val_input_data_numeric, val_input_data_categorical], axis=1)

    #set val_input_data2 to be all floats
    val_input_data_combined = val_input_data_combined.astype(float)

    columns = ['Period', 'Seconds_Elapsed', 'xC', 'yC', 'prev_Seconds_Elapsed',
            'prev_xC', 'prev_yC', 'BLOCK', 'CHL', 'DELPEN', 'FAC', 'GIVE', 'HIT',
            'MISS', 'PENL', 'SHOT', 'STOP', 'TAKE', 'BACKHAND', 'DEFLECTED',
            'SLAP SHOT', 'SNAP SHOT', 'TIP-IN', 'WRAP-AROUND', 'WRIST SHOT', '0x0',
            '3x3', '3x4', '3x5', '4x3', '4x4', '4x5', '5x3', '5x4', '5x5', '6x5',
            'on_empty_net', 'away', 'home', '5v4', '5v5',
            'prev_BLOCK', 'prev_CHL', 'prev_DELPEN', 'prev_FAC', 'prev_GIVE',
            'prev_HIT', 'prev_MISS', 'prev_PENL', 'prev_SHOT', 'prev_STOP',
            'prev_TAKE', 'prev_0x5', 'prev_3x3', 'prev_3x4', 'prev_3x5', 'prev_4x3',
            'prev_4x4', 'prev_4x5', 'prev_5x3', 'prev_5x4', 'prev_5x5', 'prev_5x6', 'prev_6x5',
            'prev_BACKHAND', 'prev_DEFLECTED',
            'prev_PS-Covering puck in crease(0 min)',
            'prev_PS-Goalkeeper displaced net(0 min)',
            'prev_PS-Holding on breakaway(0 min)',
            'prev_PS-Hooking on breakaway(0 min)',
            'prev_PS-Slash on breakaway(0 min)',
            'prev_PS-Throw object at puck(0 min)',
            'prev_PS-Tripping on breakaway(0 min)', 'prev_SLAP SHOT',
            'prev_SNAP SHOT', 'prev_TIP-IN', 'prev_WRAP-AROUND', 'prev_WRIST SHOT']

    # For each column in input_data_combined, check if it exists in val_input_data_combined, if not add it as all 0s
    for column in columns:
        if column not in val_input_data_combined.columns:
                val_input_data_combined[column] = 0

    for column in val_input_data_combined.columns:
        if column not in columns:
                if column in val_input_data_combined.columns:
                    val_input_data_combined = val_input_data_combined.drop(column, axis=1)


    # reduce val_input_data_combined to have the same columns as input_data_combined
    val_input_data_combined = val_input_data_combined[columns]

    # change all NaN numbers to 0
    val_input_data_combined = val_input_data_combined.fillna(0)

    print(val_input_data_combined.shape)

    model = nn.Sequential(
        nn.Linear(78, 240),
        nn.ReLU(),
        nn.Linear(240, 60),
        nn.ReLU(),
        nn.Linear(60, 1),
        nn.Sigmoid()
    )

    model.load_state_dict(torch.load(model_path))
    model.eval()
    model.cpu()

    x = torch.tensor(val_input_data_combined.values, dtype=torch.float32)
    y = torch.tensor(val_result_data.values, dtype=torch.float32)
    result = model(x)

    # add the probability of a goal to val_input_data_combined
    goal_shot_rows['Goal_Probability'] = result.detach().numpy()

    return val_input_data_combined, val_result_data, goal_shot_rows

# Game Evaluation

For game prediction, I'm stealing a bit from MoneyPuck, the inputs are:

1. Home team xG ratrio full for season w/linear delay
2. same, but for away team
3. home team days of rest
4. away team days of rest
5. num of games by the home team
6. num of games by the away team


Eventually, need to add some amount of goalie calculation/generic player strength calculation, but that will have to wait.


In [134]:
def get_single_game_xG_totals(original_input, game_id, state = "any"):
    game_rows = original_input[original_input["Game_Id"] == game_id]

    if state != "any":
        game_rows = game_rows[game_rows["Strength"] == state]

    # get first value of Home_Team and Away_Team
    home_team = game_rows["Home_Team"].iloc[0]
    away_team = game_rows["Away_Team"].iloc[0]

    # get all rows where the Ev_Team is the home team
    home_team_rows = game_rows[game_rows["Ev_Team"] == home_team]

    # get all rows where the Ev_Team is the away team
    away_team_rows = game_rows[game_rows["Ev_Team"] == away_team]

    # add a new column "total_xG" to home_team_rows and away_team_rows
    home_team_rows["total_xG"] = home_team_rows["Goal_Probability"].cumsum()
    away_team_rows["total_xG"] = away_team_rows["Goal_Probability"].cumsum()

    # time elapsed is (period - 1) * 1200 + seconds_elapsed
    home_team_rows["Seconds_Elapsed"] = (home_team_rows["Period"] - 1) * 1200 + home_team_rows["Seconds_Elapsed"]
    home_team_rows["Minutes_Elapsed"] = home_team_rows["Seconds_Elapsed"] / 60

    # print home_team_rows to csv
    # home_team_rows.to_csv("home_team_rows.csv")

    # time elapsed is (period - 1) * 1200 + seconds_elapsed
    away_team_rows["Seconds_Elapsed"] = (away_team_rows["Period"] - 1) * 1200 + away_team_rows["Seconds_Elapsed"]
    away_team_rows["Minutes_Elapsed"] = away_team_rows["Seconds_Elapsed"] / 60

    # away_team_rows.to_csv("away_team_rows.csv")

    total_home_xG = home_team_rows["total_xG"].iloc[-1].round(2)
    total_away_xG = away_team_rows["total_xG"].iloc[-1].round(2)

    # combine home_team_rows and away_team_rows into one dataframe
    combined_rows = pd.concat([home_team_rows, away_team_rows])

    home_score = home_team_rows["Home_Score"].iloc[-1]
    away_score = away_team_rows["Away_Score"].iloc[-1]

    return combined_rows, home_team, total_home_xG, away_team, total_away_xG, home_score, away_score


In [135]:

# This has to be done season by season, or the game_ids will merge together in a broken way
def get_season_xG_weights(original_input, season_id):
    # split original_input into multiple dataframes based off of game_id
    game_ids = original_input["Game_Id"].unique()
    # remove all instances of game_ids that start with 3
    game_ids = [x for x in game_ids if str(x)[0] != "3"]

    nhl_teams = {}
    nhl_teams_predicted_xG = {}

    results = []
    predictions = []
    predictions_rounded = []

    training_input = []

    # read in the schedule csv into a dictionary, using game_id as the key
    schedule = pd.read_csv("schedule/2020-2023_schedule.csv")
    schedule = schedule.set_index("game_id")

    for game_id in game_ids:
        # print(f"Game ID: {game_id}")
        combined_rows, home_team, total_home_xG, away_team, total_away_xG, home_score, away_score = get_single_game_xG_totals(original_input, game_id, "any")

        # add the total_xG of the home team and away team to nhl_teams
        if home_team not in nhl_teams:
            nhl_teams[home_team] = {"xG_percent": [], "total_games": 0, "game_dates" : []}
        if away_team not in nhl_teams:
            nhl_teams[away_team] = {"xG_percent": [], "total_games": 0, "game_dates" : []}

        # if both teams have more than 20 games played, create a prediciton for the game
        if nhl_teams[home_team]["total_games"] >= 20 and nhl_teams[away_team]["total_games"] >= 20:
            # get the average of last 20 games for home_team and away_team
            # home_xG_percent_avg = np.mean(nhl_teams[home_team]["xG_percent"])
            # away_xG_percent_avg = np.mean(nhl_teams[away_team]["xG_percent"])

            # home_xG_percent_avg = home_xG_percent_avg / (home_xG_percent_avg + away_xG_percent_avg)
            # away_xG_percent_avg = away_xG_percent_avg / (home_xG_percent_avg + away_xG_percent_avg)

            # # Apply linear decay to each home_team's xg_percent game by game
            decay_rate = 0.01  # Adjust the decay rate as desired


            # I think this logic still has some flaws, but we'll get to it later

            # Calculate the decayed xg_percent for each game
            home_decayed_xg_percent = np.zeros_like(nhl_teams[home_team]["xG_percent"])
            len_home_games = len(nhl_teams[home_team]["xG_percent"])
            for i in range(len_home_games):
                home_decayed_xg_percent[i] = nhl_teams[home_team]["xG_percent"][(len_home_games - i -1 )] * (1 - (decay_rate * i))

            away_decayed_xg_percent = np.zeros_like(nhl_teams[away_team]["xG_percent"])
            len_away_games = len(nhl_teams[away_team]["xG_percent"])
            for i in range(len_away_games):
                away_decayed_xg_percent[i] = nhl_teams[away_team]["xG_percent"][(len_away_games - i - 1)] * (1 - (decay_rate * i))

            home_xG_percent_sum = np.sum(home_decayed_xg_percent)
            away_xG_percent_sum = np.sum(away_decayed_xg_percent)
            
            home_xG_percent_avg = home_xG_percent_sum / (home_xG_percent_sum + away_xG_percent_sum)
            away_xG_percent_avg = away_xG_percent_sum / (home_xG_percent_sum + away_xG_percent_sum)

           

            # Calculate the days of rest for each team (format is YYYY-MM-DD):
            home_team_prev_date = nhl_teams[home_team]["game_dates"][-1]
            away_team_prev_date = nhl_teams[away_team]["game_dates"][-1]


            # Todo: add a way to get the season id programatically off of the game's date
            alt_game_id = season_id + "0" + str(game_id)
            # print(alt_game_id)

            schedule_row = schedule.loc[int(alt_game_id)]
            home_team_score = schedule_row["home_score"]
            away_team_score = schedule_row["away_score"]

            # Calculate the days of rest for each team
            # calucluate the difference in days between two strings with format YYYY-MM-DD
            # Convert strings to datetime objects
            home_prev_date = datetime.strptime(home_team_prev_date, "%Y-%m-%d")
            away_prev_date = datetime.strptime(away_team_prev_date, "%Y-%m-%d")
            current_date = datetime.strptime(combined_rows["Date"].iloc[0], "%Y-%m-%d")

            home_team_days_of_rest = (current_date - home_prev_date).days - 1
            away_team_days_of_rest = (current_date - away_prev_date).days - 1

            if (home_team_days_of_rest > 3):
                home_team_days_of_rest = 3
            if (away_team_days_of_rest > 3):
                away_team_days_of_rest = 3

            if home_team not in nhl_teams_predicted_xG:
                nhl_teams_predicted_xG[home_team] = {}
            if away_team not in nhl_teams_predicted_xG:
                nhl_teams_predicted_xG[away_team] = {}
            nhl_teams_predicted_xG[home_team][game_id] = {"xg_sum": home_xG_percent_sum, "days_rest": home_team_days_of_rest, "num_games": nhl_teams[home_team]["total_games"]}
            nhl_teams_predicted_xG[away_team][game_id] = {"xg_sum": away_xG_percent_sum, "days_rest": away_team_days_of_rest, "num_games": nhl_teams[away_team]["total_games"]}
            training_input.append({"home_xg_sum": home_xG_percent_sum, "away_xg_sum": away_xG_percent_sum,
                                    "home_days_rest": home_team_days_of_rest, "away_days_rest": away_team_days_of_rest,
                                    "home_num_games": nhl_teams[home_team]["total_games"],
                                    "away_num_games": nhl_teams[away_team]["total_games"],
                                    "game_result": home_team_score > away_team_score})
            

            results.append(home_team_score > away_team_score)
            predictions.append(home_xG_percent_avg)
            predictions_rounded.append(home_xG_percent_avg.round())

        home_xG_percent = total_home_xG / (total_home_xG + total_away_xG)
        away_xG_percent = total_away_xG / (total_home_xG + total_away_xG)

        nhl_teams[home_team]["xG_percent"].append(home_xG_percent)
        nhl_teams[away_team]["xG_percent"].append(away_xG_percent)
        nhl_teams[home_team]["total_games"] += 1
        nhl_teams[away_team]["total_games"] += 1
        nhl_teams[home_team]["game_dates"].append(combined_rows["Date"].iloc[0])
        nhl_teams[away_team]["game_dates"].append(combined_rows["Date"].iloc[0])

    # get the accuracy of the model
    results_tensor = torch.tensor(results, dtype=torch.float32)
    predictions_tensor = torch.tensor(predictions, dtype=torch.float32)
    predictions_rounded_tensor = torch.tensor(predictions_rounded, dtype=torch.float32)

    accuracy = (predictions_rounded_tensor == results_tensor).float().mean()

    # calculate log loss of predicitons
    log_loss = nn.BCELoss()
    log_loss_results = log_loss(predictions_tensor, results_tensor)

    # print(f"Accuracy {accuracy}")
    # print(f"log loss {log_loss_results}")

    return nhl_teams, nhl_teams_predicted_xG, training_input

# Creating input data for the game prediciton neural network

We need:

1. To get result data (preferably through the schedule, but working on it)
2. To get the input data:
    1. Home team xG percentage sum for season w/linear delay
    2. same, but for away team
    3. home team days of rest
    4. away team days of rest
    5. num of games by the home team
    6. num of games by the away team

In [136]:
# read in nhl_pbp_20232024.csv
val_input_data, val_result_data, original_input = create_input_data_from_csv(r"C:\Users\JoshG\hockey_scraper_data\csvs\nhl_pbp_20222023.csv")

nhl_teams, nhl_teams_predicted_xG, training_input = get_season_xG_weights(original_input, "2022")

  val_input_data_categorical = val_input_data_categorical.applymap(lambda x: 1 if x == True else 0)


(121992, 78)


In [137]:
# read in nhl_pbp_20232024.csv
val_input_data2, val_result_data2, original_input2 = create_input_data_from_csv(r"C:\Users\JoshG\hockey_scraper_data\csvs\nhl_pbp_20202021.csv")

nhl_teams2, nhl_teams_predicted_xG2, training_input2 = get_season_xG_weights(original_input2, "2020")

# combine the two training_inputs
training_input = training_input + training_input2

  val_input_data_categorical = val_input_data_categorical.applymap(lambda x: 1 if x == True else 0)


(78581, 78)


In [138]:
# Convert training_input to a dataframe
training_input_df = pd.DataFrame(training_input)

print(training_input_df.columns)

game_result_data_df = training_input_df["game_result"]

training_input_df = training_input_df.drop(columns=['game_result'])


Index(['home_xg_sum', 'away_xg_sum', 'home_days_rest', 'away_days_rest',
       'home_num_games', 'away_num_games', 'game_result'],
      dtype='object')


In [141]:
print(training_input_df["home_days_rest"].value_counts())
print(training_input_df["away_days_rest"].value_counts())
print(training_input_df.shape)

home_days_rest
1    972
2    238
0    206
3    113
Name: count, dtype: int64
away_days_rest
1    910
0    336
2    208
3     75
Name: count, dtype: int64
(1529, 6)


In [140]:
# setting device on GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

x = torch.tensor(training_input_df.values, dtype=torch.float32)
y = torch.tensor(game_result_data_df.values, dtype=torch.float32)

x.to(device)
y.to(device)

model = nn.Sequential(
    nn.Linear(6, 40),
    nn.ReLU(),
    nn.Linear(40, 20),
    nn.ReLU(),
    nn.Linear(20, 1),
    nn.Sigmoid()
)

model.to(device)


loss_fn = nn.BCELoss()  # binary cross entropy
loss_fn.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


Using device: cuda



In [142]:
n_epochs = 500000
batch_size = 100

optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

for epoch in range(n_epochs):
    for i in range(0, len(x), batch_size):
        Xbatch = x[i:i+batch_size]
        y_pred = model(Xbatch.cuda())
        ybatch = y[i:i+batch_size]
        #loss = loss_fn(y_pred, ybatch)
        loss = loss_fn(y_pred.squeeze(), ybatch.cuda())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 1000 == 0:
        print(f'{epoch}, {loss}')


# compute accuracy (no_grad is optional)
# y_pred = model(x.cuda()).cuda()
 

torch.save(model.state_dict(), 'Game_prediciton_model_0427_500k.pt')

0, 0.6637235283851624


KeyboardInterrupt: 

In [112]:
model.load_state_dict(torch.load('Game_prediciton_model_0427_25k.pt'))
model.eval()
model.cpu()

x = torch.tensor(training_input_df.values, dtype=torch.float32)
y = torch.tensor(game_result_data_df.values, dtype=torch.float32)

y_pred = model(x)



# convert the tensor to a numpy array
y_pred = y_pred.detach().numpy()
y = y.detach().numpy()


# accuracy = (y_pred.round() == y).float().mean()\
accuracy_results = []

for i in range(len(y_pred)):
    # print(f"Prediction: {y_pred[i][0]} Actual: {y[i]}")
    if y_pred[i][0].round() == y[i]:
        accuracy_results.append(1)
    else:
        accuracy_results.append(0)

accuracy = sum(accuracy_results) / len(accuracy_results)
print(f"Accuracy {accuracy}")
print(f"log loss {log_loss_results}")
# calculate log loss of predicitons



ValueError: Using a target size (torch.Size([979])) that is different to the input size (torch.Size([979, 1])) is deprecated. Please ensure they have the same size.

In [113]:
val_input_data, val_result_data, original_input = create_input_data_from_csv(r"C:\Users\JoshG\hockey_scraper_data\csvs\nhl_pbp_20212022.csv")

nhl_teams, nhl_teams_predicted_xG, training_input = get_season_xG_weights(original_input, "2021")

# Convert training_input to a dataframe
training_input_df = pd.DataFrame(training_input)
game_result_data_df = training_input_df["game_result"]
training_input_df = training_input_df.drop(columns=['game_result'])

  val_input_data_categorical = val_input_data_categorical.applymap(lambda x: 1 if x == True else 0)


(121546, 78)


In [123]:
model.load_state_dict(torch.load('Game_prediciton_model_0427_25k.pt'))
model.eval()
model.cpu()

x = torch.tensor(training_input_df.values, dtype=torch.float32)
y = torch.tensor(game_result_data_df.values, dtype=torch.float32)

y_pred = model(x)


# convert the tensor to a numpy array
y_pred = y_pred.detach().numpy()
y = y.detach().numpy()


# accuracy = (y_pred.round() == y).float().mean()\
accuracy_results = []

for i in range(len(y_pred)):
    # print(f"Prediction: {y_pred[i][0]} Actual: {y[i]}")
    if y_pred[i][0].round() == y[i]:
        accuracy_results.append(1)
    else:
        accuracy_results.append(0)

accuracy = sum(accuracy_results) / len(accuracy_results)
print(f"Accuracy {accuracy}")


Accuracy 0.5720122574055159


In [131]:
results = y
predictions = y_pred
results2 = results.flatten()
predictions2 = predictions.flatten()


for i in range(len(predictions2)):
    if predictions2[i] < 0.5:
        predictions2[i] = 1 - predictions2[i]
        if results2[i] == False:
            results2[i] = True
        else:
            results2[i] = False


results_tensor = torch.tensor(results2)
predictions_tensor = torch.tensor(predictions2)
predictions_rounded = [int(x) for x in predictions2]
predictions_rounded_tensor = torch.tensor(predictions_rounded)



percentages = [x / 100.0 for x in range(50, 100, 2)]



for i in percentages:

    indices = torch.where((predictions_tensor >= i) & (predictions_tensor <= i + 0.02))
    filtered_predictions_tensor = predictions_tensor[indices]
    filtered_results_tensor = results_tensor[indices]
    filtered_predictions_rounded_tensor = predictions_rounded_tensor[indices]

    # convert filtered_results_tensor from 1/0 to True/False
    filtered_results_tensor = filtered_results_tensor.bool()

    accuracy_tensor = (filtered_results_tensor).float().mean()
    # get length of filtered_results_tensor
    length = len(filtered_results_tensor)
    

    print(f"{i}, {accuracy_tensor}, {len(filtered_results_tensor)}")

0.5, 0.4651162922382355, 43
0.52, 0.5555555820465088, 54
0.54, 0.6000000238418579, 40
0.56, 0.5, 60
0.58, 0.6000000238418579, 60
0.6, 0.5384615659713745, 117
0.62, 0.5957446694374084, 47
0.64, 0.4864864945411682, 37
0.66, 0.5909090638160706, 44
0.68, 0.6666666865348816, 39
0.7, 0.38461539149284363, 39
0.72, 0.5, 38
0.74, 0.7419354915618896, 31
0.76, 0.5862069129943848, 29
0.78, 0.6818181872367859, 22
0.8, 0.692307710647583, 26
0.82, 0.6875, 32
0.84, 0.5652173757553101, 23
0.86, 0.7222222089767456, 18
0.88, 0.6470588445663452, 17
0.9, 0.5714285969734192, 14
0.92, 0.5833333134651184, 12
0.94, 0.6666666865348816, 12
0.96, 0.6428571343421936, 14
0.98, 0.5495495200157166, 111
