In [185]:
import pandas as pd

In [186]:
#### processing each book

## for 0:

def process_zero(df):
    df = df.copy()
    df = df[['handicap', 'odds', 'name']]
    df[['participant_name', 'name']] = df['name'].str.rsplit(' ', 1, expand=True)

    df = df.dropna()
    return df

## for 1: (Pinnacle)

def process_one(df):
    df = df.copy()
    df.dropna()
    return df[['handicap', 'odds', 'participant_name', 'name']]

## for two and six (DraftKings)

def process_two_six(df):
    df = df.copy()
    df = df[['handicap', 'odds', 'name']]

    df[['name', 'participant_name']] = df['name'].str.split(' - ', expand=True)
    df['participant_name'] = df['participant_name'].str.strip()

    df.dropna()

    return df

## for three and five

def process_three_five(df):
    df = df.copy()
    df = df[['handicap', 'odds', 'name', 'description']]

    df[['participant_name', 'description', 'a']] = df['description'].str.rsplit(' ', n=2, expand=True)
    df['participant_name'] = df['participant_name'].str.strip()

    df = df.drop(columns=['description', 'a'])
    df = df.dropna()

    return df


## for seven and eight

def process_seven_eight(df):
    df = df.copy()
    df = df[['handicap', 'odds', 'name']]

    df[['participant_name', 'name', 'a']] = df['name'].str.rsplit(' ', n=2, expand=True)

    df = df.drop(columns=['a'])
    df = df.dropna()

    return df

def process_master(data):
    df = pd.DataFrame.from_dict(data['market']['outcomes'])
    if data['bookie_key'] == "pinnacle":
        df = df.copy()
        df.dropna()
        df = df[['handicap', 'odds', 'participant_name', 'name']]
        return df
    # elif data['bookie_key'] == "fanduel":
    #     df = df.copy()
    #     df = df[['handicap', 'odds', 'name']]
    #     print(df)
    #     df[['participant_name', 'name']] = df['name'].str.rsplit(' ', 1, expand=True)

    #     df = df.dropna()
    #     return df
    elif data['bookie_key'] == "draftkings":
        df = df.copy()
        df = df[['handicap', 'odds', 'name']]

        df[['name', 'participant_name']] = df['name'].str.split(' - ', expand=True)
        df['participant_name'] = df['participant_name'].str.strip()

        df.dropna()
    
        return df
    elif data['bookie_key'] == "betrivers":
        df = df.copy()
        df = df[['handicap', 'odds', 'name']]

        df[['name', 'participant_name']] = df['name'].str.split(' - ', expand=True)
        df['participant_name'] = df['participant_name'].str.strip()

        df.dropna()

        return df

In [187]:
#### for merging all the books

## agg
def agg(df):    
    result_df = df.groupby(['participant_name', 'name']).agg({'handicap': 'first', 'odds': 'median'}).reset_index()
    return result_df

## pivot
def pivot(df):
    df = df.copy()
    pivot_df = pd.pivot_table(df, values='odds', index=['handicap', 'participant_name'], columns='name').reset_index()
    pivot_df.columns = ['line', 'participant_name', 'over_odds', 'under_odds']

    return pivot_df

## calculate odds
## adapted from Ammar Sulmanjee

def calculate_odds(x, y):
    if x >= 0:
        decimal_odds_1 = 1 + x/100
    else: 
        decimal_odds_1 = 1 + 100/abs(x)
        
    if y >= 0:
        decimal_odds_2 = 1 + y/100
    else: 
        decimal_odds_2 = 1 + 100/abs(y)

    imp_prob1 = (1 / decimal_odds_1) * 100
    imp_prob2 = (1 / decimal_odds_2) * 100

    total_implied_prob = round(imp_prob1 + imp_prob2, 4)
    fair_prob1 = round(imp_prob1 / total_implied_prob * 100, 2)
    fair_prob2 = round(imp_prob2 / total_implied_prob * 100, 2)

    ## 47, 53 == 100
    ## 53 - 50 = 3

    return max(fair_prob1, fair_prob2) - 50

In [188]:
bets = pd.DataFrame(columns = ['participant_name', 'ev', 'market', 'line'])

In [189]:
import os
import json

fantasy_directory = 'NBA_DATA/other/2023-10-24'
bookies_directory = 'NBA_DATA/books/2023-10-24'

for market in os.listdir(fantasy_directory):
    for game in os.listdir(os.path.join(fantasy_directory, market)):
        #load the json file
        with open(os.path.join(fantasy_directory, market, game)) as f:
            data = json.load(f)
            # iterate over the data's "fantasy_books" subfield
            ud = None
            for book in data['fantasy_books']:
                if book['bookie_key'] == "underdog":
                    ud = pd.DataFrame.from_dict(book['market']['lines'])
                    ud = ud[['participant_name', 'line']]
                    ud.columns = [ud.columns[0], 'handicap']
                    

                    # get sportsbook odds
                    with open(os.path.join(bookies_directory, market, game.replace("fantasy", "books"))) as f:
                        bookies_data = json.load(f)
                        books = []
                        for bookie in bookies_data['sportsbooks']:
                            if bookie['bookie_key'] == "pinnacle" or bookie['bookie_key'] == "betrivers" or bookie['bookie_key'] == "draftkings":
                                df = process_master(bookie)
                                books.append(df)
            
                        # we need to concat the books

                        new_datasets = []
                        for dataset in books:
                            dataset = pd.merge(dataset, ud, how='inner', on = ['participant_name', 'handicap'])
                            dataset = agg(dataset)
                            if dataset.shape[0] > 0 and dataset.shape[1] > 0:
                                new_datasets.append(pivot(dataset))
                        
                        concat = pd.concat(new_datasets, axis=0)
                        final = concat.copy()

                        # drop rows with null values
                        final = final.dropna()

                        final['ev']= final.apply(lambda row: calculate_odds(row['over_odds'], row['under_odds']), axis = 1)
                        final = final.drop(columns=['over_odds', 'under_odds'])
                        final = final.groupby(['participant_name', 'line']).agg({'ev': 'median'}).reset_index()

                        final = final[final['ev'] >= 4.99] # Minimum 5% edge
                        final = final[final['ev'] <= 25.00] #This is equivalent to dropping all odds whose absolute value are more than 300

                        final['market'] = market

                        final['game'] = game

                        if final.shape[0] > 0: bets = pd.concat([bets, final[['participant_name', 'ev', 'market', 'line', 'game']]])

bets = bets.sort_values(by=['ev'], ascending=False)
bets.dropna(inplace=True)
bets.drop_duplicates(keep='first', inplace=True, ignore_index=True)
print(bets.head(10))



  if final.shape[0] > 0: bets = pd.concat([bets, final[['participant_name', 'ev', 'market', 'line', 'game']]])


    participant_name      ev                     market  line  \
0     Taurean Prince  24.790   player_points_over_under   7.5   
1      Rui Hachimura  24.020   player_points_over_under   9.5   
2    Cade Cunningham  23.065   player_points_over_under  19.5   
3        Eric Gordon  21.595   player_threes_over_under   1.5   
4     Christian Wood  18.625   player_points_over_under   7.5   
5        Dario Saric  17.180   player_threes_over_under   0.5   
6  Spencer Dinwiddie  12.940   player_points_over_under  18.5   
7       Jusuf Nurkic  11.130  player_assists_over_under   2.5   
8       Jusuf Nurkic  11.065   player_points_over_under  12.5   
9      Anthony Davis  10.910   player_threes_over_under   0.5   

                                                game  
0  fantasy__2023-10-24__4622c02f9bd1df188631c86e0...  
1  fantasy__2023-10-24__4622c02f9bd1df188631c86e0...  
2  fantasy__2023-10-24__4622c02f9bd1df188631c86e0...  
3  fantasy__2023-10-24__9b3130e607f80aa4912aa184e...  
4  fantas