In [None]:
import ast
import re
import os
import itertools
import random
from collections import Counter
import pandas as pd
import numpy as np
import shutil

In [None]:
def hand_rank(hand):
    """Ritorna un valore che indica il ranking della mano."""
    ranks = '23456789TJQKA'
    rank_counts = Counter([r for r, s in hand]).most_common()
    suits = Counter([s for r, s in hand])
    
    is_straight = len(rank_counts) == 5 and (ranks.index(rank_counts[0][0]) - ranks.index(rank_counts[-1][0]) == 4)
    is_flush = any(count == 5 for count in suits.values())
    
    rank_values = [ranks.index(rank) for rank, count in rank_counts]
    sorted_ranks = sorted(rank_values, reverse=True)
    
    if is_straight and is_flush:
        return (8, sorted_ranks)
    elif rank_counts[0][1] == 4:
        return (7, sorted_ranks)
    elif rank_counts[0][1] == 3 and rank_counts[1][1] == 2:
        return (6, sorted_ranks)
    elif is_flush:
        return (5, sorted_ranks)
    elif is_straight:
        return (4, sorted_ranks)
    elif rank_counts[0][1] == 3:
        return (3, sorted_ranks)
    elif rank_counts[0][1] == 2 and rank_counts[1][1] == 2:
        return (2, sorted_ranks)
    elif rank_counts[0][1] == 2:
        return (1, sorted_ranks)
    else:
        return (0, sorted_ranks)

def evaluate_hand(hand):
    """Valuta la mano e ritorna un punteggio basato sul ranking della mano."""
    score = hand_rank(hand)
    return score

def monte_carlo_equity(hole_cards, community_cards, num_opponents, gameid,  num_simulations=2000):
    wins = 0
    deck = [r+s for r in '23456789TJQKA' for s in 'hdc s']
    
    # Controlla se hole_cards è 'folded' o 'mucked' e imposta una flag
    simulate_hole_cards = hole_cards in [['folded'], ['mucked'], ['won'], ['mucked-'], ['mucked+']]

    if simulate_hole_cards:
        return 0
    else:
        cleaned_hole_cards = [card for card in hole_cards if isinstance(card, str) and len(card) == 2]
        cleaned_community_cards = [card for card in community_cards if isinstance(card, str) and len(card) == 2]
        # Rimuovi le carte conosciute dal mazzo
        known_cards = cleaned_hole_cards + cleaned_community_cards
        for card in known_cards:
            try:
                deck.remove(card)
            except: 
                print('carta non trovata:', card, hole_cards + community_cards, 'gameid:', gameid)
        
        for _ in range(num_simulations):
            # Mescola il mazzo e distribuisci le carte rimanenti
            random.shuffle(deck)
            remaining_deck = deck[:]
            
            opponents_hands = [remaining_deck[i*2:(i+1)*2] for i in range(num_opponents)]
            all_community = cleaned_community_cards + remaining_deck[num_opponents*2:num_opponents*2+5-len(cleaned_community_cards)]
            
            player_best_hand = evaluate_hand(cleaned_hole_cards + all_community)  # Valutazione mano
            opponents_best_hands = [evaluate_hand(hand + all_community) for hand in opponents_hands]
            
            # Confronta la mano del giocatore con quelle degli avversari
            if all(player_best_hand >= opponent_hand for opponent_hand in opponents_best_hands):
                wins += 1
        
        # Calcola l'equità come la percentuale di vittorie
        return wins / num_simulations

def calculate_relative_position(button_seat, player_seat, num_players):
    if button_seat == 'Unknown' or player_seat == 'Unknown':
        return 'Unknown'
    distance = player_seat - button_seat
    if distance < 0:
        distance += num_players
    return distance

def BaselineEquityComputer(community_cards, known_HCs, num_opponents, num_simulations):
        deck = [r+s for r in '23456789TJQKA' for s in 'hdc s']
        equities = []
        # Simula diverse hole cards
        for _ in range(50):
            # Calcola l'equità per queste hole cards simulate
            simulated_equity = simulate_equity(known_HCs, community_cards, num_opponents, num_simulations // 100)
            equities.append(simulated_equity)
        
        # Calcola e ritorna la media delle equità simulate
        return sum(equities) / len(equities)

def simulate_equity(known_HCs, community_cards, num_opponents, num_simulations):
    """Funzione ausiliaria per calcolare l'equità con hole cards simulate."""
    wins = 0
    deck = [r+s for r in '23456789TJQKA' for s in 'hdc s']
    deck = [card for card in deck if card not in known_HCs + community_cards]
    
    # Scegli casualmente delle hole cards per il giocatore
    random.shuffle(deck)
    simulated_hole_cards = random.sample(deck, 2)

    for _ in range(num_simulations):
        random.shuffle(deck)
        remaining_deck = deck[:]
        opponents_hands = [known_HCs]
        opponents_hands2 = [remaining_deck[i*2:(i+1)*2] for i in range(num_opponents-len(opponents_hands))]
        opponents_hands.extend(opponents_hands2)
        all_community = community_cards + remaining_deck[num_opponents*2:num_opponents*2+5-len(community_cards)]
        
        player_best_hand = evaluate_hand(simulated_hole_cards + all_community)
        opponents_best_hands = [evaluate_hand(hand + all_community) for hand in opponents_hands]
        
        if all(player_best_hand >= opponent_hand for opponent_hand in opponents_best_hands):
            wins += 1

    return wins / num_simulations

NUOVO APPROCIO

In [None]:
def split_poker_hands(text):
    # Suddividi le mani di poker basandoti su due newline consecutive
    hands = re.split(r'\n\s*\n', text.strip())
    return hands

def process_poker_file_to_df(file_path, processed_files_dir, output_csv_path):
    # Processa un singolo file .txt e restituisce un DataFrame con le informazioni
    with open(file_path, 'r', encoding='utf-8') as file:
        file_content = file.read()
    
    # Suddividi il contenuto del file in mani separate
    hands = split_poker_hands(file_content)
    
    all_hand_info = []  # Inizializza una lista per contenere le informazioni di tutte le mani
    
    # Itera su ogni mano, applica parse_hand_for_all_players_with_position, e raccoglie i dati
    i = 1
    for hand in hands:
        hand_info = parse_hand_for_all_players_with_position(hand)
        all_hand_info.extend(hand_info)  # Aggiungi le informazioni di questa mano alla lista
        print('hand', i, 'of', len(hands), 'processed')
        i += 1
    # Converti la lista di dizionari in un DataFrame
    df = pd.DataFrame(all_hand_info)
    
    # Aggiungi i dati al file CSV esistente (o crealo se non esiste)
    if os.path.exists(output_csv_path):
        df.to_csv(output_csv_path, mode='a', header=False, index=False)
    else:
        df.to_csv(output_csv_path, mode='w', header=True, index=False)
    
    # Sposta il file .txt processato nella directory specificata
    shutil.move(file_path, os.path.join(processed_files_dir, os.path.basename(file_path)))

def process_poker_files_in_folder(folder_path, processed_files_dir, output_csv_path):
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            process_poker_file_to_df(file_path, processed_files_dir, output_csv_path)
            print(f"Processed and moved: {filename}")


In [None]:
player_pattern = re.compile(r"Seat (\d+): (.+?) \(\$([\d,.]+)\)(?:, is sitting out)?")

def parse_hand_for_all_players_with_position(hand_text):
    player_map = {}
    # Estrai le informazioni preliminari come prima
    preliminary_section = hand_text.split("*** HOLE CARDS ***")[0]
    # Regex per trovare gli importi degli small e big blind
    pattern = r'Table .+? - \$(\d+)/\$(\d+) -'
            # Trova gli importi usando regex
    match = re.search(pattern, preliminary_section)
    if match:
        small_amount = int(match.group(1))
        big_amount = int(match.group(2))
    else:
        print("Informazioni sugli importi non trovate")
        small_amount, big_amount = 0,0
    game_id_match = re.search(r"Full Tilt Poker Game #(\d+): Table (\w+ \(.+?\))", preliminary_section)
    game_id, table = game_id_match.groups() if game_id_match else ("Unknown", "Unknown")
    button_seat_match = re.search(r"The button is in seat #(\d)", preliminary_section)
    button_seat = int(button_seat_match.group(1)) if button_seat_match else "Unknown"
    
    # Utilizza il PlayerID per identificare chi ha il piccolo e il grande buio
    small_blind_player = None
    big_blind_player = None
    small_blind_match = re.search(r"(\S+) posts the small blind", preliminary_section)
    big_blind_match = re.search(r"(\S+) posts the big blind", preliminary_section)
    if small_blind_match:
        small_blind_player = small_blind_match.group(1).strip()
        print("small blind found", small_blind_player)
    if big_blind_match:
        big_blind_player = big_blind_match.group(1).strip()
        print("big blind found", big_blind_player)
    player_seats = re.findall(r"Seat (\d+): (.+?) \(\$([\d,.]+)\)", preliminary_section)
    player_matches = player_pattern.finditer(preliminary_section)
    num_players = len(player_seats)
    for match in player_matches:
        folded = False
        seat, player_id, stack = match.groups()
        seat = int(seat)
        blind = "no"
        stack = stack.replace(",", "")
        button = "yes" if seat == button_seat else "no"
        player_map = {player_id: {"oldAction": None, "oldPhase": "pre-START", "HC_Actions": [], "FLOP_Actions": [], "TURN_Actions": [], "RIVER_Actions": [], "HC_Aggression": 0, "FLOP_Aggression": 0, "TURN_Aggression": 0, "RIVER_Aggression": 0, "Seat": int(seat), "WinAmount": 0, "TotalAmount": 0, "TotalAmount_player": 0, "Stack": stack, "HoleCards": [], "Folded": folded, "Blind": blind, "RelativePosition": calculate_relative_position(button_seat, int(seat), int(num_players)), "Button": button} for seat, player_id, stack in player_seats}
   
    print(small_blind_player, big_blind_player)
    if small_blind_player in player_map:
        player_map[small_blind_player]["Blind"] = "small"
        player_map[small_blind_player]["TotalAmount"] += small_amount
        player_map[small_blind_player]["TotalAmount_player"] += small_amount
    if big_blind_player in player_map:
        player_map[big_blind_player]["Blind"] = "big"
        player_map[big_blind_player]["TotalAmount"] += big_amount
        player_map[big_blind_player]["TotalAmount_player"] += big_amount


        # Split the text based on "*** HOLE CARDS ***"
    uncalled = False
    parts = hand_text.split("*** HOLE CARDS ***")
    if len(parts) > 1:
        # Further split the remaining part based on "*** SUMMARY ***"
        game_section = parts[1].split("*** SUMMARY ***")[0].strip()

        uncalled_search = re.finditer(r"Uncalled bet of \$(\d+(\.\d{1,2})?) returned to (\S+)", game_section)
        for match in uncalled_search:
            amount_uncalled = float(match.group(1))  # Importo dell'uncalled bet
            player_id_uncalled = match.group(3)  # ID del giocatore
            print("Uncalled bet found!!")
            uncalled = True

    dataset = []
    current_phase = "START"
    community_cards = []
    hole_cards = []
    known_HCs = []

    # Preparazione per identificare l'ID del giocatore e la sua azione
    action_pattern = re.compile(r"^(.+) (folds|calls|checks|bets|raises to|collected)(?: \[(.*?)\])?(?: \$(\d+))?")
    action_values = {'checks': 0, 'calls': 1, 'raises to': 3, 'bets': 3}
    # Analizza la sezione SHOW DOWN per le hole cards e l'azione di muck
    show_down_section = re.search(r"\*\*\* SHOW DOWN \*\*\*([\s\S]*)", hand_text)
    if show_down_section:
        show_down_text = show_down_section.group(1)
        for line in show_down_text.split("\n"):
            show_match = re.search(r"(.+) shows \[([\S\s]+)\]", line)
            if show_match:
                player_id, hole_cards_str = show_match.groups()
                hole_cards = hole_cards_str.split(' ')
                if player_id in player_map:
                    player_map[player_id]["HoleCards"] = hole_cards
            muck_match = re.search(r"(.+) mucks", line)
            if muck_match:
                player_id = muck_match.group(1)
                if player_id in player_map:
                    player_map[player_id]["HoleCards"] = ['mucked-']
                    
    # Continua subito dopo la sezione dello showdown
    summary_section_search = re.search(r"\*\*\* SUMMARY \*\*\*([\s\S]*)", hand_text)
    if summary_section_search:
        summary_text = summary_section_search.group(1)
        sitting_out_search = re.finditer(r"Seat \d+: (.+?)(?: \(button\)| \(small blind\)| \(big blind\))? is sitting out", summary_text)
        for match in sitting_out_search:
            player_id = match.group(1).strip()  # Rimuovi spazi extra per sicurezza
            if player_id in player_map:
                player_map[player_id]["Folded"] = True
                player_map[player_id]["HoleCards"] = ['folded']

        # Cerca nel testo del summary i giocatori che hanno mostrato le carte e l'importo vinto o perso
        showed_cards_search = re.finditer(r"Seat \d+: (.+?)\s*(?:\(button\)|\(small blind\)|\(big blind\))?\s*showed \[([KQJTA2-9hdc s]+)\] and won \(\$(.+?)\)", summary_text)
        for match in showed_cards_search:
            player_id, hole_cards_str, win_amount_str = match.groups()
            win_amount = float(win_amount_str.replace(",", ""))  # Converte la stringa in un float per gestire importi decimali
            hole_cards = hole_cards_str.split(' ')
            # Aggiungi o aggiorna le informazioni nel player_map
            if player_id in player_map:
                player_map[player_id]["HoleCards"] = hole_cards
                # Aggiorna WinAmount con l'importo vinto
                player_map[player_id]["WinAmount"] = win_amount
                known_HCs = hole_cards
            else:
                # Gestisci il caso in cui il player_id non sia stato trovato precedentemente
                print(f"Giocatore {player_id} non trovato nel pre-summary. Game ID: {game_id}")
                #toprint = False
                
        # Cerca nel testo del summary il giocatore che ha vinto senza mostrare le carte
        mucked_win_search = re.finditer(r"Seat \d+: (.+?)\s*(?:\(small blind\)|\(big blind\)|\(button\))?\s*collected \(\$(.+?)\), mucked", summary_text)
        for match in mucked_win_search:
            player_id, win_amount_str = match.groups()
            # Pulisci la cifra e rendila un numero intero
            win_amount = float(win_amount_str.replace(",", ""))  # Converte la stringa in un float
            # Verifica e aggiorna le HoleCards per il giocatore vincitore
            if player_id in player_map:
                if not player_map[player_id]["HoleCards"]:
                    player_map[player_id]["HoleCards"] = ['mucked+']
                # Aggiorna WinAmount con l'importo vinto
                player_map[player_id]["WinAmount"] = win_amount
            else:
                # Gestisci il caso in cui il player_id non sia stato trovato precedentemente
                print(f"Giocatore {player_id} non trovato nel pre-summary. Game ID: {game_id}")

    for player_id, info in player_map.items():
        if not info["HoleCards"]:
            info["HoleCards"] = ['folded']
    preliminary_info = {
        "GameID": game_id,
        "Table": table,
        "Players": num_players,
        }
    # Ora puoi procedere con il calcolo dei giocatori attivi
    active_players = num_players - sum(1 for player in player_map.values() if player["Folded"])
    BL_Equity = "NotFound"
    # ricerca giocate
    lines = hand_text.split("\n")
    for line in lines:
        if "***" in line:
            # Attempt to match the phase pattern
            active_players = num_players - sum(1 for player in player_map.values() if player["Folded"])
            phase_match = re.search(r"\*\*\* ([A-Z ]+) \*\*\*", line)
            if phase_match:
                current_phase = phase_match.group(1)
                cards_text = re.findall(r"\[([^\]]+)\]", line)
                community_cards = []
                for card in cards_text:
                    community_cards.extend(card.split(' '))
            else:
                print(f"Pattern not found in line: {line}")
            # calcolo BaselineEquity per la fase corrente
            BL_Equity = round(BaselineEquityComputer(community_cards, known_HCs, active_players, num_simulations = 1000),5)
        else:
            action_match = action_pattern.match(line)
            if action_match:
                player_id, action, _, amount_str = action_match.groups()
                if action == "folds" and player_id in player_map:
                    player_map[player_id]["Folded"] = True
                if not (action == "folds" and current_phase == 'HOLE CARDS'):
                    if player_id in player_map: #and toprint == True:
                        if amount_str is not None: # OCCHIO: potrebbe servire correggere l'amount rispetto alla giocata fatta ("adds 6, bets 6")
                            amount = float(amount_str.replace(",", ""))  # Rimuovi virgole per migliaia
                            player_map[player_id]["TotalAmount"] += amount
                            player_map[player_id]["TotalAmount_player"] += amount
                        else:
                            amount = ""
                        seat = player_map[player_id]["Seat"]
                        hole_cards = player_map[player_id]["HoleCards"]
                        equity = monte_carlo_equity(hole_cards, community_cards, active_players, game_id)

                        if player_map[player_id]["oldAction"] != []:# and player_map[player_id]["oldPhase"] != current_phase:
                            if player_map[player_id]["oldPhase"] == "HOLE CARDS":
                                player_map[player_id]["HC_Actions"].append(player_map[player_id]["oldAction"])
                                player_map[player_id]["HC_Aggression"] += action_values[player_map[player_id]["oldAction"]]
                            elif player_map[player_id]["oldPhase"] == "FLOP":
                                player_map[player_id]["FLOP_Actions"].append(player_map[player_id]["oldAction"])
                                player_map[player_id]["FLOP_Aggression"] += action_values[player_map[player_id]["oldAction"]]
                            elif player_map[player_id]["oldPhase"] == "TURN":
                                player_map[player_id]["TURN_Actions"].append(player_map[player_id]["oldAction"])
                                player_map[player_id]["TURN_Aggression"] += action_values[player_map[player_id]["oldAction"]]
                            elif player_map[player_id]["oldPhase"] == "RIVER":
                                player_map[player_id]["RIVER_Actions"].append(player_map[player_id]["oldAction"])
                                player_map[player_id]["RIVER_Aggression"] += action_values[player_map[player_id]["oldAction"]]


                        HC = player_map[player_id]["HC_Actions"].copy()
                        FLOP = player_map[player_id]["FLOP_Actions"].copy()
                        TURN = player_map[player_id]["TURN_Actions"].copy()
                        RIVER = player_map[player_id]["RIVER_Actions"].copy()
                        HC_A = player_map[player_id]["HC_Aggression"]
                        FLOP_A = player_map[player_id]["FLOP_Aggression"]
                        TURN_A = player_map[player_id]["TURN_Aggression"]
                        RIVER_A = player_map[player_id]["RIVER_Aggression"]

                        dataset.append({
                            "Reward": 0,
                            "Equity": round(equity, 5), 
                            "BaselineEquity": BL_Equity, 
                            "HoleCards": hole_cards, 
                            "Won": player_map[player_id]["WinAmount"], 
                            "TotalAmount": player_map[player_id]["TotalAmount"], 
                            "TotalAmount_player": player_map[player_id]["TotalAmount_player"],
                            "PlayerID": player_id,
                            "Phase": current_phase,
                            "Action": action,
                            "Amount": amount,
                            "CommunityCards": community_cards,
                            "PlayerStack": player_map[player_id]["Stack"],
                            "RelativePosition": calculate_relative_position(button_seat, seat, num_players),
                            "ActivePlayers": active_players,
                            "Seat": seat,
                            "HC_Actions": HC,
                            "FLOP_Actions": FLOP,
                            "TURN_Actions": TURN,
                            "RIVER_Actions": RIVER,
                            "HC_Aggression": HC_A,
                            "FLOP_Aggression": FLOP_A,
                            "TURN_Aggression": TURN_A,
                            "RIVER_Aggression": RIVER_A,
                            "Blind": player_map[player_id]["Blind"],
                            **preliminary_info
                        })

                        player_map[player_id]["oldAction"] = action
                        player_map[player_id]["oldPhase"] = current_phase

# Calcolo delle reward per ciascun giocatore e assegnazione retroattiva

# Se il giocatore è l'uncalled placer, aggiorna l'ultima entry nel dataset
            # Trova l'ultima entry relativa al giocatore nel dataset
    if uncalled == True:
        for entry in reversed(dataset):
            entry["TotalAmount"] -= amount_uncalled
            if entry["PlayerID"] == player_id_uncalled:
                print("Uncalled placer found!!")
                entry["TotalAmount_player"] -= amount_uncalled
                break  # Aggiorna solo l'ultima entry e interrompi il ciclo
    for player_id in player_map:
        if uncalled == False:
            REWARD = round(player_map[player_id]["WinAmount"] - player_map[player_id]["TotalAmount_player"], 3)
        elif player_id == player_id_uncalled:
            REWARD = round(player_map[player_id]["WinAmount"] - player_map[player_id]["TotalAmount_player"], 3) + amount_uncalled
        else:
            REWARD = round(player_map[player_id]["WinAmount"] - player_map[player_id]["TotalAmount_player"], 3)

        # Assegna la reward a tutte le osservazioni corrispondenti nel dataset
        for entry in dataset:
            if entry["PlayerID"] == player_id:
                entry["Reward"] = REWARD

    return dataset


In [None]:
folder_path = r'C:\Users\giaco\Desktop\bet\FTP-2009-07-01_2009-07-23_600NLH_OBFU'
processed_files_dir = r'C:\Users\giaco\Desktop\bet\FTP-2009-07-01_2009-07-23_600NLH_OBFU\used'
output_csv_path = r'C:\Users\giaco\Desktop\bet\final_data\POKERDATA_noearlyfolds.txt'
process_poker_files_in_folder(folder_path, processed_files_dir, output_csv_path)

In [None]:
text = """Full Tilt Poker Game #26262281666: Table /DSOYkjhdkynD0Vfu3J6Xw (6 max) - $3/$6 - $180 Cap No Limit Hold'em - 0:00:22 ET - 2009/07/01
Seat 1: Bq4FfTBlqodWz9Fqq4pJTA ($579)
Seat 2: SkzmQSV891XvhLVNZ4xY0g ($653.55)
Seat 3: 4YtoARF1A+6a+XmneBB2LA ($409.80)
Seat 4: EBxYwwEFb4Iu6X7XI2vJMw ($211.55)
Seat 5: dSu2SGssP3d5YGeED7IA+g ($513)
Seat 6: tTriEl+vbdR/T3B/n7t3ew ($698.95)
dSu2SGssP3d5YGeED7IA+g posts the small blind of $3
tTriEl+vbdR/T3B/n7t3ew posts the big blind of $6
The button is in seat #4
*** HOLE CARDS ***
Bq4FfTBlqodWz9Fqq4pJTA folds
SkzmQSV891XvhLVNZ4xY0g raises to $18
4YtoARF1A+6a+XmneBB2LA folds
EBxYwwEFb4Iu6X7XI2vJMw calls $18
dSu2SGssP3d5YGeED7IA+g folds
tTriEl+vbdR/T3B/n7t3ew folds
*** FLOP *** [7d 9h 4s]
SkzmQSV891XvhLVNZ4xY0g checks
EBxYwwEFb4Iu6X7XI2vJMw checks
*** TURN *** [7d 9h 4s] [Qs]
SkzmQSV891XvhLVNZ4xY0g bets $30
EBxYwwEFb4Iu6X7XI2vJMw raises to $162, and is capped
SkzmQSV891XvhLVNZ4xY0g calls $132, and is capped
EBxYwwEFb4Iu6X7XI2vJMw shows [Qc 5c]
SkzmQSV891XvhLVNZ4xY0g shows [Kd Qh]
*** RIVER *** [7d 9h 4s Qs] [6h]
EBxYwwEFb4Iu6X7XI2vJMw shows a pair of Queens
SkzmQSV891XvhLVNZ4xY0g shows a pair of Queens
SkzmQSV891XvhLVNZ4xY0g wins the pot ($366) with a pair of Queens
*** SUMMARY ***
Total pot $369 | Rake $3
Board: [7d 9h 4s Qs 6h]
Seat 1: Bq4FfTBlqodWz9Fqq4pJTA didn't bet (folded)
Seat 2: SkzmQSV891XvhLVNZ4xY0g showed [Kd Qh] and won ($366) with a pair of Queens
Seat 3: 4YtoARF1A+6a+XmneBB2LA didn't bet (folded)
Seat 4: EBxYwwEFb4Iu6X7XI2vJMw (button) showed [Qc 5c] and lost with a pair of Queens
Seat 5: dSu2SGssP3d5YGeED7IA+g (small blind) folded before the Flop
Seat 6: tTriEl+vbdR/T3B/n7t3ew (big blind) folded before the Flop"""

out = parse_hand_for_all_players_with_position(text)

df = pd.DataFrame(out)
print(df)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.callbacks import EarlyStopping
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt


In [None]:


# Leggi il file CSV in un DataFrame
POKERDATA = pd.read_csv("C:/Users/giaco/Desktop/bet/final_data/POKERDATA_noearlyfolds.txt")
POKERDATA['PlayerStack'] = pd.to_numeric(POKERDATA['PlayerStack'], errors='coerce')

POKERDATA['NumHearts'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('h'))
POKERDATA['NumDiamonds'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('d'))
POKERDATA['NumClubs'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('c'))
POKERDATA['NumSpades'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('s'))

# Puoi anche considerare di calcolare il numero di carte per ogni valore
for val in '23456789TJQKA':
    POKERDATA[f'Num{val}'] = POKERDATA['CommunityCards'].apply(lambda x: sum(card.startswith(val) for card in x))



# Crea un backup del DataFrame
backup = POKERDATA.copy()

# Modifica il DataFrame in base al contenuto della colonna 'HoleCards'
POKERDATA['HoleCards'] = POKERDATA['HoleCards'].apply(lambda x: "['cards']" if x not in ["['mucked-']", "['mucked+']", "['folded']"] else x)

# Seleziona le righe del DataFrame dove 'HoleCards' è uguale a "['cards']"
datacards = backup[POKERDATA['HoleCards'] == "['cards']"]
datanocards = backup[POKERDATA['HoleCards'] != "['cards']"]


backup_POKERDATA = POKERDATA


print(datacards.head())

In [None]:
POKERDATA = datacards.copy()

POKERDATA['Amount'] = POKERDATA['Amount'].fillna(0)
POKERDATA['Won'] = pd.to_numeric(POKERDATA['Won'], errors='coerce')
POKERDATA['BaselineEquity'] = pd.to_numeric(POKERDATA['BaselineEquity'], errors='coerce')
POKERDATA['RelativePosition'] = pd.to_numeric(POKERDATA['RelativePosition'], errors='coerce')

# Trova le colonne che contengono almeno un valore NaN
columns_with_na = POKERDATA.columns[POKERDATA.isna().any()].tolist()

# Stampa le colonne con valori NaN
print("Colonne con valori NaN:", columns_with_na)

# Opzionalmente, stampa il numero di valori NaN per ciascuna di queste colonne
for col in columns_with_na:
    print(f"{col}: {POKERDATA[col].isna().sum()} valori NaN")
    # Rimozione delle colonne specificate
POKERDATA = POKERDATA.drop(["Table", "GameID", "PlayerID", "CommunityCards", 'PlayerStack', 'HC_Actions','FLOP_Actions','TURN_Actions','RIVER_Actions'], axis=1)
print(POKERDATA.columns.tolist())
# Identifica colonne non numeriche
non_numeric_columns = POKERDATA.select_dtypes(exclude=['int64', 'float64']).columns
print(non_numeric_columns)

POKERDATA = POKERDATA.dropna(axis=0, subset=['Phase', "BaselineEquity", "RelativePosition"])
columns_with_na = POKERDATA.columns[POKERDATA.isna().any()].tolist()

# Stampa le colonne con valori NaN
print("Colonne con valori NaN:", columns_with_na)

In [None]:
POKERDATA = pd.get_dummies(POKERDATA, columns=non_numeric_columns[non_numeric_columns != "HoleCards"])
POKERDATA = POKERDATA.astype({col: 'int' for col in POKERDATA.columns if POKERDATA[col].dtype == 'bool'})

print(POKERDATA.head())
print(POKERDATA.shape)

In [None]:
output_csv_path2 = r'C:\Users\giaco\Desktop\bet\final_data\POKERDATA_noearlyfolds_formatted.txt'
POKERDATA.to_csv(output_csv_path2, mode='w', header=True, index=False)

In [None]:
POKERDATA.shape

In [None]:
POKERDATA = pd.read_csv("C:/Users/giaco/Desktop/bet/final_data/POKERDATA_noearlyfolds_formatted.txt")
POKERDATA['Label'] = (POKERDATA['Won'] > 0).astype(int)
POKERDATA['Amount'] = POKERDATA['Amount'].fillna(0)
POKERDATA['NumHearts'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('h'))
POKERDATA['NumDiamonds'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('d'))
POKERDATA['NumClubs'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('c'))
POKERDATA['NumSpades'] = POKERDATA['CommunityCards'].apply(lambda x: x.count('s'))

for val in '23456789TJQKA':
    POKERDATA[f'Num{val}'] = POKERDATA['CommunityCards'].apply(lambda x: sum(card.startswith(val) for card in x))

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score


data_numeric = POKERDATA.select_dtypes(include=['int64', 'float64'])
data_scaled_fork = data_numeric.drop(['Won', "Equity"], axis = 1)
# Standardizzazione dei dati
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_scaled_fork)

# 

In [None]:

# Trova le colonne che contengono almeno un valore NaN
columns_with_na = POKERDATA.columns[POKERDATA.isna().any()].tolist()

# Stampa le colonne con valori NaN
print("Colonne con valori NaN:", columns_with_na)

# Opzionalmente, stampa il numero di valori NaN per ciascuna di queste colonne
for col in columns_with_na:
    print(f"{col}: {POKERDATA[col].isna().sum()} valori NaN")
    # Rimozione delle colonne specificate

POKERDATA = POKERDATA.dropna()

columns_with_na = POKERDATA.columns[POKERDATA.isna().any()].tolist()

# Stampa le colonne con valori NaN
print("Colonne con valori NaN:", columns_with_na)
print(POKERDATA.shape)

In [None]:
print(POKERDATA.columns.tolist())

In [None]:
# Assicurati che la colonna 'Won' sia convertita in numerico se non lo è già
POKERDATA['Won'] = pd.to_numeric(POKERDATA['Won'], errors='coerce')
POKERDATA['BaselineEquity'] = pd.to_numeric(POKERDATA['BaselineEquity'], errors='coerce')
POKERDATA['Equity'] = pd.to_numeric(POKERDATA['Equity'], errors='coerce')

from sklearn.preprocessing import StandardScaler


data_scaled = POKERDATA.select_dtypes(include=['int64', 'float64'])
data_scaled_fork = data_numeric.drop(['Won', "Equity"], axis = 1)
# Standardizzazione dei dati
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_scaled_fork)

In [None]:
POKERDATA = pd.read_csv("C:/Users/giaco/Desktop/bet/final_data/POKERDATA_noearlyfolds_formatted.txt")
POKERDATA['RelativePosition_corrected'] = (POKERDATA['RelativePosition'].astype(int) + 3) / POKERDATA['Players']
#POKERDATA['BaselineEquity'], POKERDATA['Equity'] = 2*POKERDATA['BaselineEquity'], 2*POKERDATA['Equity']

datacards = POKERDATA#[POKERDATA['HoleCards'] == "['cards']"]
y = pd.to_numeric(datacards['Equity']) - pd.to_numeric(datacards['BaselineEquity'])
X = datacards.drop(columns=['HoleCards', 'Equity', 'BaselineEquity'])

In [None]:
#print(X.head())
print(y.head(20))

X = X.values  # tutte le colonne tranne la prima
y = y.values


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_temp, X_test, y_temp, y_test = train_test_split(X_scaled, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25)  # 0.25 x 0.8 = 0.2

X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_val = X_val.astype('float32')
y_val = y_val.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

In [None]:
from keras.layers import LSTM, Dense, Dropout, GRU
from keras.optimizers import Adam, SGD
import numpy as np

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='linear')
])
optimizer = SGD(learning_rate=0.1, momentum=0.8, nesterov=True)

model.compile(optimizer=optimizer,
              loss='mean_squared_error',
              metrics=['mean_squared_error'])

early_stopping = EarlyStopping(monitor='val_loss', patience=30, restore_best_weights=True)

history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32, callbacks=[early_stopping])

In [None]:
# Valutazione del modello sul set di test
test_loss, test_mse = model.evaluate(X_test, y_test, verbose=2)
print('\nTest mean squared error:', test_mse)
print(history.history.keys())
# summarize history for accuracy for both train and validation
plt.plot(history.history['mean_squared_error'])
plt.plot(history.history['val_mean_squared_error'])
plt.title('model mean_squared_error')
plt.ylabel('mean_squared_error')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
# plt.ylim(0.8, 1)
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')

In [None]:
predicted_labels = model.predict(X_test)

# Convert predicted labels to 1D array if needed
predicted_labels = np.argmax(predicted_labels, axis=1)  # Assuming one-hot encoded output

# Compare predicted labels with true labels
for i in range(len(predicted_labels)):
    print(f"Predicted: {predicted_labels[i]}, True: {y_test[i]}")

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Creazione del modello di regressione lineare
linear_model = LinearRegression()

# Addestramento del modello
linear_model.fit(X_train, y_train)

# Predizione sul validation set
y_pred_linear = linear_model.predict(X_val)

# Calcolo dell'errore quadratico medio
mse_linear = mean_squared_error(y_val, y_pred_linear)
print(f"MSE for Linear Regression: {mse_linear}")


In [None]:
from sklearn.tree import DecisionTreeRegressor

# Creazione del modello di regressione tramite albero decisionale
tree_model = DecisionTreeRegressor(max_depth=5)  # Puoi modificare la profondità massima per controllare la complessità

# Addestramento del modello
tree_model.fit(X_train, y_train)

# Predizione sul validation set
y_pred_tree = tree_model.predict(X_val)

# Calcolo dell'errore quadratico medio
mse_tree = mean_squared_error(y_val, y_pred_tree)
print(f"MSE for Decision Tree Regressor: {mse_tree}")


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Creazione del modello Random Forest
rf_model = RandomForestRegressor(n_estimators=1000, max_depth=100, random_state=42, verbose=1, n_jobs=-1)

# Addestramento del modello
rf_model.fit(X_train, y_train)

# Predizione sul validation set
y_pred_rf = rf_model.predict(X_val)

# Calcolo dell'errore quadratico medio
mse_rf = mean_squared_error(y_val, y_pred_rf)
print(f"MSE for Random Forest Regressor: {mse_rf}")

# Confronto tra valori reali e stimati
plt.figure(figsize=(10, 5))
plt.scatter(range(len(y_val)), y_val, color='blue', label='Actual values')
plt.scatter(range(len(y_pred_rf)), y_pred_rf, color='red', alpha=0.5, label='Predicted values')
plt.title('Comparison of Actual and Predicted Values')
plt.legend()
plt.show()


In [None]:
# Predizione sul test set
y_pred_rf = rf_model.predict(X_test)

# Calcolo dell'errore quadratico medio
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f"MSE for Random Forest Regressor: {mse_rf}")

# Confronto tra valori reali e stimati
plt.figure(figsize=(10, 5))
plt.scatter(range(len(y_test)), y_test, color='blue', label='Actual values')
plt.scatter(range(len(y_pred_rf)), y_pred_rf, color='red', alpha=0.5, label='Predicted values')
plt.title('Comparison of Actual and Predicted Values')
plt.legend()
plt.show()

In [None]:
import pandas as pd
import re

def clean_cell(cell):
    if isinstance(cell, str):
        cleaned_cell = re.sub(r'[\[\]]', '', cell)
        return cleaned_cell if cleaned_cell else 'none'
    return cell

df = pd.read_csv("C:/Users/giaco/Desktop/bet/final_data/POKERDATA_noearlyfolds_gae.txt")
df = df.map(clean_cell)
df.head()

In [None]:
output_csv = r'C:\Users\giaco\Desktop\bet\final_data\POKERDATA_noearlyfolds_no[].txt'
df.to_csv(output_csv, mode='w', header=True, index=False)

In [None]:
import os
from PyPDF2 import PdfMerger
from PIL import Image
import fitz  # PyMuPDF

# Funzione per ottenere i file PDF ordinati per data di modifica
def get_pdf_files_sorted_by_date(directory):
    files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.pdf')]
    files.sort(key=lambda x: os.path.getmtime(x))
    return files

# Funzione per unire i file PDF
def merge_pdfs(pdf_files, output_path):
    merger = PdfMerger()
    for pdf in pdf_files:
        merger.append(pdf)
    merger.write(output_path)
    merger.close()

# Funzione per convertire i PDF in immagini
def convert_pdfs_to_images(pdf_files, output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    page_counter = 1
    for pdf_file in pdf_files:
        doc = fitz.open(pdf_file)
        for page_num in range(len(doc)):
            page = doc.load_page(page_num)  # numero della pagina
            pix = page.get_pixmap()
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            img.save(os.path.join(output_directory, f"page_{page_counter}.png"))
            page_counter += 1


# Directory contenente i PDF e directory di output
pdf_directory = r'C:\Users\giaco\Desktop\uni\Deep Learning\schemi'
output_pdf_path = r'C:\Users\giaco\Desktop\uni\Deep Learning\schemi\merged.pdf'
output_image_directory = r'C:\Users\giaco\Desktop\uni\Deep Learning\schemi'

# Ottenere i file PDF ordinati per data
pdf_files = get_pdf_files_sorted_by_date(pdf_directory)

# Unire i PDF
merge_pdfs(pdf_files, output_pdf_path)
print(f"I file PDF sono stati uniti in {output_pdf_path}")

# Convertire i PDF in immagini
convert_pdfs_to_images(pdf_files, output_image_directory)
print(f"I file PDF sono stati convertiti in immagini nella cartella {output_image_directory}")