In [1]:
import torch
import torch.nn as nn
import os

import numpy as np 
import pandas as pd
import datetime
import pickle
import time
import random

from model import ClassifierModel
from train import train_model

In [2]:
def generate_architecture ():
    start_power = random.randint(4,5)
    max_power = random.randint(6,10)
    repeats = random.randint(1,3)
    end_power = random.randint(1,3)

    powers = np.arange(start_power, max_power)
    powers = np.concatenate([powers, max_power*np.ones(repeats)])
    powers = np.concatenate([powers, np.arange(max_power, end_power-1, -1)])

    return np.power(2, powers).astype(int).tolist()

def generate_cols (cols, all_cols):
    add = bool(random.getrandbits(1))
    if add:
        while True:
            col_to_add = random.choice(all_cols)
            if col_to_add not in cols:
                break
        cols.append(col_to_add)
    else:
        cols = random.sample(cols, len(cols)-1)
    return cols

In [4]:
in_path = os.path.join("data","5_ordinal_mean_tensor")

entire_df = pd.read_csv(os.path.join(in_path,'all_races.csv'), index_col=0)

all_cols = ['total_stakes','horse_weight','horse_handicap','horse_odds','horse_rating','horse_import_type','horse_sex','horse_colour',
    'horse_age','horse_origin','horse_gear','days_since_import',
    'draw',
    'jockey_age','jockey_nationality','jockey_wins','jockey_rides','jockey_stakes','jockey_same_race_wins',
    # 'race_location','race_class','race_going','race_distance','race_surface','race_course',
    'total_stakes_rank','horse_weight_rank','horse_handicap_rank','horse_odds_rank','horse_rating_rank','days_since_import_rank',
    'jockey_age_rank','jockey_rides_rank','jockey_stakes_rank','jockey_same_race_wins_rank']

while True:
    # cols_to_keep = random.sample(all_cols, random.randint(15, len(all_cols)))
    cols_to_keep = ['total_stakes_rank','horse_weight_rank','horse_handicap_rank','horse_odds_rank','horse_rating_rank','days_since_import_rank',
    'jockey_age_rank','jockey_rides_rank','jockey_stakes_rank','jockey_same_race_wins_rank']
    layers = generate_architecture()

    x_df = entire_df[cols_to_keep]
    y_df = entire_df[['top_3','not_top_3']]

    x = x_df.to_numpy()
    y = y_df.to_numpy()

    x_tensor = torch.from_numpy(x)
    y_tensor = torch.from_numpy(y)

    print('training', cols_to_keep, layers)

    model_name, best_val_loss, best_val_acc = train_model(x_tensor, y_tensor, layers)
    # model_name, best_val_loss, best_val_acc = 'model', 0.1, 0.1

    models_df = pd.read_csv('models.csv', index_col=0)
    models_df.loc[-1] = [model_name, str(layers), str(cols_to_keep), best_val_loss, best_val_acc]
    models_df.sort_values('val_acc', ascending=False, inplace=True)
    models_df.reset_index(drop=True, inplace=True)
    models_df.to_csv('models.csv')
    models_df.to_csv('models_copy.csv')

training ['total_stakes_rank', 'horse_weight_rank', 'horse_handicap_rank', 'horse_odds_rank', 'horse_rating_rank', 'days_since_import_rank', 'jockey_age_rank', 'jockey_rides_rank', 'jockey_stakes_rank', 'jockey_same_race_wins_rank'] [16, 32, 64, 128, 256, 512, 512, 256, 128, 64, 32, 16, 8]
2024_01_27_22_24_16_32_64_128_256_512_512_256_128_64_32_16_8_32_353_586 saved best val loss 0.5860604896860303 best val acc 31.22641509433962
training ['total_stakes_rank', 'horse_weight_rank', 'horse_handicap_rank', 'horse_odds_rank', 'horse_rating_rank', 'days_since_import_rank', 'jockey_age_rank', 'jockey_rides_rank', 'jockey_stakes_rank', 'jockey_same_race_wins_rank'] [32, 64, 64, 32, 16, 8]
2024_01_27_22_29_32_64_64_32_16_8_32_252_547 saved best val loss 0.5470032886888996 best val acc 2.742138364779874
training ['total_stakes_rank', 'horse_weight_rank', 'horse_handicap_rank', 'horse_odds_rank', 'horse_rating_rank', 'days_since_import_rank', 'jockey_age_rank', 'jockey_rides_rank', 'jockey_stakes

KeyboardInterrupt: 

In [None]:
# layers = [8, 16, 8, 4]

# model = ClassifierModel(35, layers).to(device)
# # model = ConvNet(input_size, hidden_size, output_size).to(device)

# if os.path.exists('model_configs/2024_01_23_19_48_8_16_8_4_32_259_536'):
#     model.load_state_dict(torch.load('model_configs/2024_01_23_19_48_8_16_8_4_32_259_536', map_location="cpu"))
# model.eval()

# in_path = os.path.join("data","5_ordinal_mean_tensor")

# entire_df = pd.read_csv(os.path.join(in_path,'all_races.csv'), index_col=0)

# correct, trios, total = 0, 0, 0

# for index, row in entire_df.iterrows():

#     x_df = row.drop(['race_index','place','finish_time','top_3','not_top_3'])
#     x = x_df.to_numpy(dtype=np.float32)
#     x_tensor = torch.from_numpy(x).unsqueeze(dim=0)

#     pred = model(x_tensor).detach().numpy()[0]

#     if pred[0] > pred[1]:
#         if row['top_3']:
#             print(pred)
#             # print(row['top_3'])
#             # print('correct')
#             correct += 1
#     total += 1

# print(correct/total)