In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_ranking as tfr
# Get the data
runs = pd.read_csv("Data/runs.csv")[:100]
races = pd.read_csv("Data/races.csv")
## quick fix some nan values

runs["horse_country"] = runs["horse_country"].replace(np.nan, runs["horse_country"].mode()[0])
runs["horse_type"] = runs["horse_type"].replace(np.nan, runs["horse_type"].mode()[0])

EXAMPLE_FEATURES = ['horse_age', 'horse_country', 'horse_type', 'horse_rating']
# No context features until the races dataset is incorperated
CONTEXT_FEATURES = ['surface', 'distance', 'going']

In [3]:
def makeTestAndTrain(trainFile, testFile, splitNum):
    testTFRecord(test_file, rowNum)
    trainTFRecord(train_file, rowNum)

def trainTFRecord(output_file, raceIDNum):
    print("Creating training data ...")
    writer = tf.io.TFRecordWriter(output_file)
    """Build an Example proto"""
    for ID in range(raceIDNum):
        race = races.iloc[ID, :]
        # Context columns
        raceContext = tf.train.Features(feature={
            'surface':tf.train.Feature(int64_list=tf.train.Int64List(value=[race["surface"]])),
            'distance':tf.train.Feature(int64_list=tf.train.Int64List(value=[race["distance"]])),
            'going':tf.train.Feature(bytes_list=tf.train.BytesList(value=[race["going"].encode('utf-8')])),
        })
        
        # Sequence data
        raceRuns = runs.loc[runs["race_id"]==ID]
        hAge = []
        hCountry = []
        hType = []
        hRating = []
        hPlacing = []
        for i in range(len(raceRuns)):
            run = raceRuns.iloc[i]
            
            age_feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[run['horse_age']]))
            hAge.append(age_feature)
            
            country_feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[run['horse_country'].encode("utf-8")]))
            hCountry.append(country_feature)
            
            type_feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[run['horse_type'].encode("utf-8")]))
            hType.append(type_feature)
            
            rating_feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[run['horse_rating']]))
            hRating.append(rating_feature)
            
            place_feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[run['result']]))
            hPlacing.append(place_feature)
            
        hAges = tf.train.FeatureList(feature=hAge)
        hCountries = tf.train.FeatureList(feature=hCountry)
        hTypes = tf.train.FeatureList(feature=hType)
        hRatings = tf.train.FeatureList(feature=hRating)
        hPlacings = tf.train.FeatureList(feature=hPlacing)
        
        horseInfo = tf.train.FeatureLists(feature_list={
            'horse_age':hAges,
            'horse_country':hCountries,
            'horse_type':hTypes,
            'horse_ratings':hRatings,
            'horse_placing':hPlacings
        })
        horseFeatureList = tf.train.FeatureLists(feature_list=horseInfo)

        example = tf.train.SequenceExample(context=raceContext, feature_lists=horseFeatureList)
        
        # Save the examples
        writer.write(example.SerializeToString())
    
    print("Done creating training data.")
    writer.close()
        
        
def testTFRecord(output_file, backRaceID):
    print("Creating test data ...")
    writer = tf.io.TFRecordWriter(output_file)
    """Build an Example proto"""
    for ID in range(backRaceID, max(races["race_id"])):
        race = races.iloc[ID, :]
        # Context columns
        raceContext = tf.train.Features(feature={
            'surface':tf.train.Feature(int64_list=tf.train.Int64List(value=[race["surface"]])),
            'distance':tf.train.Feature(int64_list=tf.train.Int64List(value=[race["distance"]])),
            'going':tf.train.Feature(bytes_list=tf.train.BytesList(value=[race["going"].encode('utf-8')])),
        })
        
        # Sequence data
        raceRuns = runs.loc[runs["race_id"]==ID]
        hAge = []
        hCountry = []
        hType = []
        hRating = []
        hPlacing = []
        for i in range(len(raceRuns)):
            run = raceRuns.iloc[i]
            
            age_feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[run['horse_age']]))
            hAge.append(age_feature)
            
            country_feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[run['horse_country'].encode("utf-8")]))
            hCountry.append(country_feature)
            
            type_feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[run['horse_type'].encode("utf-8")]))
            hType.append(type_feature)
            
            rating_feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[run['horse_rating']]))
            hRating.append(rating_feature)
            
            place_feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[run['result']]))
            hPlacing.append(place_feature)
            
        hAges = tf.train.FeatureList(feature=hAge)
        hCountries = tf.train.FeatureList(feature=hCountry)
        hTypes = tf.train.FeatureList(feature=hType)
        hRatings = tf.train.FeatureList(feature=hRating)
        hPlacings = tf.train.FeatureList(feature=hPlacing)
        
        horseInfo = tf.train.FeatureLists(feature_list={
            'horse_age':hAges,
            'horse_country':hCountries,
            'horse_type':hTypes,
            'horse_rating':hRatings,
            'horse_placing':hPlacings
        })
        
        example = tf.train.SequenceExample(context=raceContext, feature_lists=horseInfo)
        
        # Save the examples
        writer.write(example.SerializeToString())
        
    print("Done creating test data.")
    writer.close()
    
        
train_file = 'Data/train.tfrecord'
test_file = 'Data/test.tfrecord'
rowNum = int(max(races["race_id"]) * 0.8)
makeTestAndTrain(train_file, test_file, rowNum)

Creating test data ...
Done creating test data.
Creating training data ...


TypeError: Argument feature_list is not iterable

In [None]:
def printDat(file):
    raw_dataset = tf.data.TFRecordDataset([file])
    for raw_record in raw_dataset.take(2):
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        print(example)

In [None]:
printDat(train_file)

In [None]:
featuresDict = {"horse_age":tf.io.FixedLenSequenceFeature([], dtype=tf.int64),
                "horse_country":tf.io.FixedLenSequenceFeature([], dtype=tf.string),
                "horse_type":tf.io.FixedLenSequenceFeature([], dtype=tf.string),
                "horse_age":tf.io.FixedLenSequenceFeature([], dtype=tf.int64),
                "horse_placing":tf.io.FixedLenSequenceFeature([], dtype=tf.int64)
               }
contextDict = {"distance":tf.io.FixedLenFeature([], dtype=tf.int64),
               "surface":tf.io.FixedLenFeature([], dtype=tf.int64),
               "going":tf.io.FixedLenFeature([], dtype=tf.string)
              }

def parse_tfrecord(example):
    context, features = tf.io.parse_single_sequence_example(example, 
                                                         sequence_features=featuresDict, 
                                                         context_features=contextDict)
    distance = context["distance"]
    surface = context["surface"]
    going = context["going"]
    
    age = features["horse_age"]
    #country = tf.io.decode_raw(features["horse_country"], tf.uint8)
    return distance, surface, going, age, #country
    
Dataset = tf.data.TFRecordDataset(train_file)
Dataset = Dataset.map(parse_tfrecord)
#iterator = Dataset.make_one_shot_iterator()

batched_dataset = Dataset.batch(1)

for next_element in batched_dataset:
    tf.print(next_element)
#with tf.Session() as sess:
#    print(sess.run(iterator.get_next()))


In [None]:
tf.__version__

In [3]:
! pip install xgboost
import xgboost as xgb

Collecting xgboost
  Downloading xgboost-1.0.2.tar.gz (821 kB)
[K     |████████████████████████████████| 821 kB 1.3 MB/s eta 0:00:01
Building wheels for collected packages: xgboost
  Building wheel for xgboost (setup.py) ... [?25ldone
[?25h  Created wheel for xgboost: filename=xgboost-1.0.2-cp36-cp36m-macosx_10_9_x86_64.whl size=3463804 sha256=13d8a3c6c63fc5146e1a2c21a0dd38b23d4129ec5f650ba64b2f2f074c22955f
  Stored in directory: /Users/noahrowe/Library/Caches/pip/wheels/06/0a/03/1dd5317e4ad7882450a41265354839831f7094739ee401043c
Successfully built xgboost
Installing collected packages: xgboost
Successfully installed xgboost-1.0.2
You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 -m pip install --upgrade pip' command.[0m
