In [None]:
!pip install 
!pip install oracledb
!pip install line_profiler
!pip install memory_profiler

In [None]:
%load_ext memory_profiler
%load_ext line_profiler

%reload_ext memory_profiler
%reload_ext line_profiler

In [None]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from pprint import pprint

In [None]:
username = 'admin'
password = 'password'
cluster = 'cluster'
uri = f"mongodb+srv://{username}:{password}@{cluster}/?retryWrites=true&w=majority"

db = None
# Create a new client and connect to the server
try:
    client = MongoClient(uri, server_api=ServerApi('1'))
    client.admin.command('ping')
    print("Successfully connected to MongoDB!")
    db = client["PokerDB"]
except Exception as e:
    print(f"Connection failed: {e}")

In [None]:
def pipeline_queries(pipeline, collection, as_list=True):
    """
    Run an aggregation pipeline on a MongoDB collection.

    Args:
        collection: MongoDB collection
        pipeline (list): Aggregation pipeline
        as_list (bool): If True, return results as a list

    Returns:
        list | cursor: Aggregation results
    """
    cursor = collection.aggregate(pipeline)
    return list(cursor) if as_list else cursor

In [None]:
players_collection = db["Players"]
tournaments_collection = db["Tournaments"]
winners_collection = db["Winners"]
hands_collection = db["Hands"]

#### How often do players win in a particular position?

In [None]:
pipeline = [
    {
        '$match': {'$expr': {'$gte': [ {'$size': '$players'}, 5 ]}}
    }, {
        '$unwind': {'path': '$winners'}
    }, {
        '$group': { '_id': '$winners.Position','count': { '$sum': 1 }}
    }, {
        '$project': {'count': 1, '_id': 0,'position': '$_id'}
    }, {
        '$sort': { 'position': 1}
    }
]
%lprun -f pipelineQueries winning_positions = pipelineQueries(pipeline, hands_collection)

Timer unit: 1e-09 s

Total time: 0.15356 s
File: /var/folders/g7/zzcmg5nn1pg9xnpy33cm360c0000gn/T/ipykernel_3571/3316437495.py
Function: pipelineQueries at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def pipelineQueries(pipeline, collection):
     2         1  153559000.0    2e+08    100.0      cursor = collection.aggregate(pipeline)
     3         1       1000.0   1000.0      0.0      return cursor

In [None]:
for item in winning_positions:
    print(item)

#### Top ten largest pots and the winning cards

In [None]:
pipeline = [
    {
        '$unwind': {'path': '$winners'}
    }, {
        '$sort': { 'winners.Pot': -1 }
    }, {
        '$limit': 10
    }, {
        '$project': { '_id': 0, 'Pot': '$winners.Pot', 'card': '$winners.HandRank'}
    }
]
%lprun -f pipelineQueries top_ten_pots = pipelineQueries(pipeline, hands_collection)

In [None]:
for item in top_ten_pots:
    print(item)

#### Top 9 Winning Card Combinations

In [None]:
pipeline = [
    {
        '$unwind': { 'path': '$winners' }
    }, {
        '$group': { '_id': '$winners.HandRank', 'total': { '$sum': 1 } }
    }, {
        '$project': { '_id': 0, 'card_rank': '$_id', 'total': '$total' }
    }, {
        '$sort': { 'total': -1 }
    }
]

%lprun -f pipelineQueries winning_card_combos = pipelineQueries(pipeline, hands_collection)

In [None]:
for item in winning_card_combos:
    print(item)

#### Find players who played in the 2023-12-25 tournament

In [None]:
pipeline = [
    {
        '$match': { 'Date': '2023-12-25'}
    }, {
        '$project': {'Player': '$Players'}
    }, {
        '$unwind': { 'path': '$Player'}
    }, {
        '$lookup': {'from': 'Players', 'localField': 'Player', 'foreignField': 'PID', 'as': 'result'
        }
    }, {
        '$unwind': { 'path': '$result'}
    }, {
        '$project': { 'player': '$result.Name', '_id': 0 }
    }
]

%lprun -f pipelineQueries players_in_tournament = pipelineQueries(pipeline, tournaments_collection)

In [None]:
for player in players_in_tournament:
    print(player)

#### Who won the most money in 2023?

In [None]:
pipeline = [
    {
        '$match': { 'Date': { '$regex': '2023'}}
    }, {
        '$unwind': { 'path': '$Winners' }
    }, {
        '$sort': { 'Winners.Payout': -1}
    }, {
        '$limit': 1
    }, {
        '$project': {'player': '$Winners.Player', '_id': 0 }
    }
]

%lprun -f pipelineQueries most_money_wins_2023 = pipelineQueries(pipeline, tournaments_collection)

In [None]:
for item in most_money_wins_2023:
    print(item['player'])

#### What is the average number of hands in a game

In [None]:
pipeline = [
    {
        '$project': {
            '_id': 0,
            'games': '$Games.game',
            'total_hands': { '$size': '$Games.hands'}
        }
    }, {
        '$group': {
            '_id': 0,
            'total_games': { '$sum': 1 },
            'total_hands': { '$sum': '$total_hands'}
        }
    }, {
        '$project': {
            '_id': 0,
            'average_hands': {'$divide': [ '$total_hands', '$total_games']
            }
        }
    }
]

%lprun -f pipelineQueries game_hands_avg = pipelineQueries(pipeline, rounds_collection)

In [None]:
for item in game_hands_avg:
    print(f"Average number of hands per game: {str(item['average_hands'])}")

#### Countries with the best players (or players who have won more games)

In [None]:
pipeline = [
    {
        '$unwind': { 'path': '$Winners'}
    }, {
        '$group': {  '_id': '$Winners.Player_id',  'wins': {'$sum': 1 }}
    }, {
        '$sort': { 'wins': -1}
    }, {
        '$limit': 3
    }, {
        '$lookup': {
            'from': 'Players',
            'localField': '_id',
            'foreignField': 'PID',
            'as': 'result'
        }
    }, {
        '$project': {
            'Player': '$result.Name',
            'Country': '$result.Country',
            'Number of games won': '$wins'
        }
    }
]

%lprun -f pipelineQueries best_player_countries = pipelineQueries(pipeline, tournaments_collection)

Timer unit: 1e-09 s

Total time: 0.113387 s
File: /var/folders/g7/zzcmg5nn1pg9xnpy33cm360c0000gn/T/ipykernel_3571/3316437495.py
Function: pipelineQueries at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def pipelineQueries(pipeline, collection):
     2         1  113387000.0    1e+08    100.0      cursor = collection.aggregate(pipeline)
     3         1          0.0      0.0      0.0      return cursor

In [None]:
for item in best_player_countries:
    print(item['Country'])

#### Most common in the different stages of the hand after pre-flop

In [None]:
pipeline =[
    {
        '$unwind': { 'path': '$cards' }
    }, {
        '$group': {
            '_id': {
                'Stage': '$cards.Stage',
                'HandRank': '$cards.HandRank'
            },
            'count': { '$sum': 1 }
        }
    }, {
        '$sort': { 'count': -1}
    }, {
        '$group': {
            '_id': '$_id.Stage',
            'handrank': {'$first': '$_id.HandRank'},
            'count': {'$first': '$count'}
        }
    }, {
        '$project': {
            '_id': 0,
            'Stage': '$_id',
            'handrank': '$handrank',
            'count': '$count'
        }
    }
]

%lprun -f pipelineQueries common_card_in_stage = pipelineQueries(pipeline, hands_collection)

Timer unit: 1e-09 s

Total time: 0.236229 s
File: /var/folders/g7/zzcmg5nn1pg9xnpy33cm360c0000gn/T/ipykernel_3571/3316437495.py
Function: pipelineQueries at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def pipelineQueries(pipeline, collection):
     2         1  236228000.0    2e+08    100.0      cursor = collection.aggregate(pipeline)
     3         1       1000.0   1000.0      0.0      return cursor

In [None]:
for item in common_card_in_stage:
    print(f"{item['Stage']}: {item['handrank']} appears about {str(item['count']) times}")

{'Stage': 'FLOP', 'handrank': 'HC', 'count': 30632}
{'Stage': 'TURN', 'handrank': 'ONEPAIR', 'count': 24472}
{'Stage': 'RIVER', 'handrank': 'ONEPAIR', 'count': 18489}

#### Players with the most aggressive style of betting (largets bets)

In [None]:
pipeline = [
    {
        '$project': {'cards': 1}
    }, {
        '$unwind': {'path': '$cards'}
    }, {
        '$sort': {'cards.Chips': -1}
    }, {
        '$limit': 10
    }, {
        '$project': {'_id': 0,'player': '$cards.Player', 'bet_placed': '$cards.Chips'}
    }
]


%lprun -f pipelineQueries aggressive_bets = pipelineQueries(pipeline, hands_collection)

Timer unit: 1e-09 s

Total time: 0.156331 s
File: /var/folders/g7/zzcmg5nn1pg9xnpy33cm360c0000gn/T/ipykernel_3571/3316437495.py
Function: pipelineQueries at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     1                                           def pipelineQueries(pipeline, collection):
     2         1  156331000.0    2e+08    100.0      cursor = collection.aggregate(pipeline)
     3         1          0.0      0.0      0.0      return cursor

In [None]:
import dataframes as pd

results = {
    'Player': [x['player'] for x in aggressive_bets],
    'Bet': [x['bet_placed'] for x in aggressive_bets] 
}

df = pd.DataFrame(result)
df1 = df.style.set_properties(**{'text-align': 'center'}).set_caption('<h2 style="font-weight: bold;">Largest Bets</h2>')
display(df1)

#### Most common bet on two aces preflop

In [None]:
pipeline = [
    {
        '$unwind': {'path': '$hole'}
    }, {
        '$match': { '$expr': { '$setIsSubset': [ '$hole.Cards', ['ad', 'ah', 'ac', 'as']] } }
    }, {
        '$project': {'hole': 1}
    }, {
        '$group': {'_id': '$hole.Action', 'count': {'$sum': 1} }
    }, {
        '$sort': {'count': -1}
    }, {
        '$limit': 1
    }
]

%lprun -f pipelineQueries common_bet_aces_preflop = pipelineQueries(pipeline, hands_collection)

In [None]:
for item in common_bet_aces_preflop:
    print(f"{item['_id']} is the most common bet: Made {str(item['count'])} times")

#### Identify the combinations that win the most (per player)

In [None]:
pipeline = [
    {
        '$unwind': { 'path': '$winners' }
    }, {
        '$group': {
            '_id': {
                'player': '$winners.Player',
                'card_combo': '$winners.HandRank'
            },
            'count': { '$sum': 1 }
        }
    }, {
        '$sort': { 'count': -1 }
    }, {
        '$group': {
            '_id': '$_id.player',
            'card': { '$first': '$_id.card_combo' },
            'max': { '$first': '$count' }
        }
    }, {
        '$project': {
            '_id': 0,
            'player': '$_id',
            'rank': '$card',
            'max': '$max'
        }
    }
]
%lprun -f pipelineQueries most_winning_ranks_by_player = pipelineQueries(pipeline, hands_collection)

In [None]:
result = {
    'Player': [x['player'] for x in most_winning_ranks_by_player],
    'Rank': [x['rank'] for x in most_winning_ranks_by_player],
    'Number of times won': [x['max'] for x in most_winning_ranks_by_player]
}

df = pd.DataFrame(result)
df1 = df.style.set_properties(**{'text-align': 'center'}).set_caption('<h2 style="font-weight: bold;">Player Most Winning Card</h2>')
display(df1)

#### Players who won the Longest Games

In [None]:
pipeline = [
    {
        '$unwind': { 'path': '$Games' }
    }, {
        '$addFields': {
            'Games.numHands': {
                '$size': '$Games.hands'
            }
        }
    }, {
        '$sort': { 'Games.numHands': -1 }
    }, {
        '$limit': 15
    }, {
        '$project': {
            '_id': 0,
            'numHands': '$Games.numHands',
            'last_hand': {
                '$arrayElemAt': [ '$Games.hands', -1 ]
            }
        }
    }, {
        '$lookup': {
            'from': 'Hands',
            'localField': 'last_hand.hand_id',
            'foreignField': 'hand_id',
            'as': 'details'
        }
    }, {
        '$project': {
            'winners': '$details.winners',
            'hands': '$numHands'
        }
    }, {
        '$unwind': { 'path': '$winners' }
    }, {
        '$project': {
            'player': {
                '$arrayElemAt': [
                    '$winners', -1
                ]
            },
            'hands': '$hands'
        }
    }, {
        '$project': {
            'player': '$player.Player',
            'hands': '$hands'
        }
    }
]

%lprun -f pipelineQueries longest_game_winners = pipelineQueries(pipeline, rounds_collection)

In [None]:
result = {
    'Player': [item['player'] for item in longest_game_winners if 'player' in item],
    'Hands in Game': [item['hands'] for item in longest_game_winners if 'player' in item]
}

df = pd.DataFrame(result)
df1 = df.style.set_properties(**{'text-align': 'center'}).set_caption('<h2 style="font-weight: bold;">Top 10 Longest Game winners</h2>')
display(df1)

In [None]:
#close connection
client.close()