![title](./img/UNIBET.png)

In [11]:
# %run ./PProcess0.ipynb
# driver = webdriver.Chrome(ChromeDriverManager().install())

In [2]:
import requests
import pandas as pd
import re
import unicodedata

# --- Helper: Strip Accents ---
def strip_accents(text):
    if not isinstance(text, str): return text
    return ''.join(c for c in unicodedata.normalize('NFD', text)
                   if unicodedata.category(c) != 'Mn')

# --- 1. Use your existing df_mapping logic ---
# (Assumes df_mapping is already created from the code you provided)

# --- 2. Fetch Data ---
url = "https://www.unibet.fr/zones/v3/sportnode/markets.json?nodeId=2243762&filter=R%C3%A9sultat&marketname=R%C3%A9sultat%20du%20match"
headers = {"User-Agent": "Mozilla/5.0"}

try:
    response = requests.get(url, headers=headers, timeout=10)
    if response.status_code == 200:
        json_data = response.json()
        raw_events = []
        
        # Access the API structure
        markets = json_data.get("marketsByType", [])
        if markets:
            for day, d in enumerate(markets[0].get("days", [])):
                for e in d.get("events", []):
                    raw_events.append({
                        "Day": day,
                        "Match": e.get("eventName"),
                        "EventId": e.get("eventId"),
                        "competitionName": e.get("competitionName"),
                        "clean_comp": strip_accents(e.get("competitionName", ""))
                    })
        
        dfRaw = pd.DataFrame(raw_events)

        # --- 3. Refined Matching using df_mapping ---
        all_matched_dfs = []

        print("--- üîé PROCESSING MATCHES ---")
        # Iterate directly through your mapping DataFrame
        for _, row in df_mapping.iterrows():
            pattern = row['Regex_Pattern']
            code = row['Comp_Code']
            country = row['Country']
            
            if pattern == "No Pattern Defined":
                continue
            
            # Use search instead of match for better flexibility with API names
            regex = re.compile(pattern, re.IGNORECASE)
            mask = dfRaw["clean_comp"].apply(lambda x: bool(regex.search(str(x))))
            
            if mask.any():
                # Extract the matches and tag them with the mapping info
                temp_df = dfRaw[mask].copy()
                temp_df['Comp_Code'] = code
                temp_df['Country'] = country
                all_matched_dfs.append(temp_df)
                print(f"‚úÖ {code} ({country}): Found {len(temp_df)} matches")

        # --- 4. Final Data Assembly ---
        if all_matched_dfs:
            dfGames = pd.concat(all_matched_dfs, ignore_index=True)
            
            # Split Match into Home and Away
            df_split = dfGames['Match'].str.split(' - ', n=1, expand=True)
            dfGames['Home'] = df_split[0]
            dfGames['Away'] = df_split[1]
            
            # Select final columns
            dfGames = dfGames[['Day', 'Comp_Code', 'Country', 'Home', 'Away', 'EventId', 'competitionName']]
            
            # Optional: Remove duplicates if a game matches two different patterns
            dfGames = dfGames.drop_duplicates(subset=['EventId', 'Comp_Code'])
            
            print("\nüèÅ SUCCESS: dfGames ready with", len(dfGames), "total matches.")
        else:
            print("\nüö® No matches found for the mapped competitions.")

except Exception as e:
    print(f"üö® SCRIPT ERROR: {str(e)}")

--- üîé PROCESSING MATCHES ---
‚úÖ fra.1 (France): Found 16 matches
‚úÖ fra.2 (France): Found 8 matches

üèÅ SUCCESS: dfGames ready with 24 total matches.


In [146]:
# a = pd.DataFrame(row_data1)
# a = a['competitionName'].unique()
# b = dfGames['competitionName'].unique()
# list(set(a) ^ set(b))

In [147]:
import datetime
import requests
import json
import time
import random
from tqdm import tqdm
tqdm._instances.clear()
import pandas as pd

# --- Initialization ---
row_data0 = []
session = requests.Session()
# Headers to look like a real browser
session.headers.update({
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Accept": "application/json",
    "Referer": "https://www.unibet.fr/sport/football"
})

# --- The Loop ---
# Using a slightly slower pace to avoid 503 errors
for i, idgame in enumerate(tqdm(dfGames["EventId"])):
    idgame_str = str(idgame)
    url = f"https://www.unibet.fr/zones/event.json?eventId={idgame_str}"
    
    # 1. ANTI-BOT DELAY: Random pause between 1 to 2 seconds
    time.sleep(random.uniform(1.0, 2.0))
    
    # 2. CHUNKING: Take a longer break every 15 matches to reset server tracking
    if i % 15 == 0 and i > 0:
        time.sleep(5)

    try:
        response = session.get(url, timeout=15)
        
        # 3. RETRY LOGIC: If 503 occurs, wait and try one last time
        if response.status_code == 503:
            time.sleep(10) 
            response = session.get(url, timeout=20)

        if response.status_code == 200:
            json_data = response.json()
            
            # Extract basic info safely
            header = json_data.get("eventHeader", {})
            competitionName = header.get('competitionName', 'Unknown')
            
            start_date = header.get('eventStartDate')
            DateTime = datetime.datetime.fromtimestamp(start_date / 1000).strftime('%Y-%m-%d %H:%M:%S') if start_date else "N/A"

            # Locate the markets
            market_classes = json_data.get('marketClassList', [])
            if not market_classes:
                continue

            # # --- NEW: Print all marketNames found in this match ---
            # found_markets = [m.get('marketName') for m in market_classes]
            # print(f"\nüîç Markets found for {idgame_str}: {found_markets}")
            # # ------------------------------------------------------
            
            # Identify Home/Away from the main match result market
            try:
                duel = market_classes[0]['marketList'][0]['selections']
                home = strip_accents(duel[0]['name'])
                away = strip_accents(duel[2]['name'])
            except (IndexError, KeyError):
                home, away = "Unknown", "Unknown"

            # 4. PARSE SPECIFIC MARKETS
            # target_markets = [
            #     'Total de buts', 
            #     'Combo chance double & Total de buts',
            #     'But pour les 2 √©quipes',
            #     'R√©sultat du match',
            #     'Chance double',
            #     'Combo chance double & les 2 √©quipes marquent'
            # ]
            target_markets = [
                'R√©sultat du match',
                'Double chance',
                'Total de buts',
                'But pour les 2 √©quipes',
                'Combo r√©sultat du match & Total de buts',
                'Combo r√©sultat du match & Les 2 √©quipes marquent',
                'Combo double chance & Total de buts',
                'Combo double chance & les 2 √©quipes marquent',
                # 'Score exact',
                # 'Mi-temps / Fin de match',
                # 'Buteur',
                # 'Nombre total de tirs cadr√©s'
            ]

            for m in market_classes:
                if m.get('marketName') in target_markets:
                    for m_list in m.get('marketList', []):
                        for j in m_list.get('selections', []):
                            try:
                                # Safe Odd Calculation
                                up = float(j['currentPriceUp'])
                                down = float(j['currentPriceDown'])
                                odd = round(1 + (up / down), 2)
                                
                                row_data0.append({
                                    "IdGame": idgame_str,
                                    "DateTime": DateTime,
                                    "Competition": competitionName,
                                    "Home": home,
                                    "Away": away,
                                    "Bet": j.get('name'),
                                    "Odd": odd
                                })
                            except (ZeroDivisionError, KeyError, TypeError):
                                continue 
        else:
            print(f"Skipping {idgame_str}: Received Status {response.status_code}")

    except Exception as e:
        print(f"Connection Error for {idgame_str}: {e}")
        continue

# --- 5. Final Output ---
dfOdds = pd.DataFrame(row_data0)
if not dfOdds.empty:
    print(f"‚úÖ Successfully extracted {len(dfOdds)} odds.")
    # display(dfOdds.head(15))
else:
    print("‚ùå No odds were extracted. Check if market names match the API.")

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:43<00:00,  1.73s/it]

‚úÖ Successfully extracted 1765 odds.





In [148]:
import pandas as pd
import numpy as np
from unidecode import unidecode
import re

# --- 1. DEFINE ALL DICTIONARIES ---
# Paste the full content of your MatchTeamNames here to ensure they are in memory
DictTeam = {
    "VfB Stuttgart": "Stuttgart", "FC Augsburg": "Augsbourg", "Karlsruher SC": "Karlsruhe",
    "SC Paderborn 07": "Paderborn", "1. FC Heidenheim": "Heidenheim", "SSV Jahn Regensburg": "Jahn Regensburg",
    "FC St. Pauli": "St. Pauli", "VfL Bochum": "Bochum", "Bayern Munich": "Bayern Munich",
    "RB Leipzig": "RB Leipzig", "Hertha Berlin": "Hertha BSC", "M'gladbach": "Borussia M'Gladbach",
    "Bayer Leverkusen": "Bayer Leverkusen", "VfL Wolfsburg": "Wolfsbourg", "Hambourg": "Hamburger SV",
    "Eintracht Frankfurt": "Eintracht Francfort", "1. FC Union Berlin": "Union Berlin",
    "Schalke 04": "Schalke 04", "Borussia Dortmund": "Dortmund", "Werder Bremen": "Werder Breme",
    "Paris Saint-Germain ": "Paris SG", "AS Monaco": "Monaco", "AC Milan": "AC Milan",
    "Internazionale": "Inter Milan", "Juventus Turin": "Juventus", "AS Rome": "AS Roma"
    # ... (Include all other teams from your list here)
}

# Auto-generate the bidirectional dictionary
Dict_teams = {}
for key, value in DictTeam.items():
    if len(key) <= len(value):
        Dict_teams[value] = key
    else:
        Dict_teams[key] = value

special_dict0 = {"1 FC Nuremberg": "Nurnberg"}

special_dict = {
    "FK Autriche Wien": "Austria Vienne", "FC Seville": "Sevilla", "Palerme FC": "Palerme",
    "Venezia FC": "Venezia", "FC Nuremberg": "Nurnberg", "Atalanta BC": "Atalanta",
    "Girona FC": "Gerone", "Watford FC": "Watford", "Toulouse FC": "Toulouse"
    # ... (Include other special mappings)
}

# --- 2. INITIALIZE DATAFRAME ---
OddGames = pd.DataFrame(row_data0)

# Normalize text (Remove accents and ensure strings)
for col in ['Bet', 'Home', 'Away']:
    OddGames[col] = OddGames[col].apply(lambda x: unidecode(str(x)) if x and str(x) != 'nan' else x)

# --- 3. APPLY TEAM MAPPINGS & MARKERS (H/A) ---
# Apply special dicts first
for d in [special_dict0, special_dict]:
    OddGames["Bet"] = OddGames["Bet"].replace(d, regex=True)
    OddGames["Away"] = OddGames["Away"].replace(d, regex=True)
    OddGames["Home"] = OddGames["Home"].replace(d, regex=True)

inverted_dict = {v: k for k, v in Dict_teams.items()}

# Function to replace specific team names with 'H' or 'A' in the Bet string
def replace_team_markers(row):
    res = str(row["Bet"])
    h_re = re.escape(str(row["Home"]))
    a_re = re.escape(str(row["Away"]))
    res = re.sub(a_re, 'A', res)
    res = re.sub(h_re, 'H', res)
    return res

# Apply dictionaries and then convert names to H/A markers
for d in [inverted_dict, Dict_teams]:
    OddGames["Away"] = OddGames["Away"].replace(d)
    OddGames["Home"] = OddGames["Home"].replace(d)
    OddGames["Bet"] = OddGames["Bet"].replace(d)

OddGames["Bet"] = OddGames.apply(replace_team_markers, axis=1)

# --- 4. STANDARDIZE BET NAMES ---
replacements = {
    r'Moins de ': 'M', r'Plus de ': 'P', r' ou ': '/',
    r'under ': 'M', r'over ': 'P', r' Or ': '/', r' or ': '/',
    r'Match nul': 'D', r'match nul': 'D', r'Match Nul': 'D',
    r'Draw': 'D', r'draw': 'D', r'Egalite': 'D',
    r'Oui': 'LDEM', r'Non': 'LDEMP', r'Yes': 'LDEM', r'No': 'LDEMP',
    r'\s+': '' # Remove all whitespace
}

for old, new in replacements.items():
    OddGames["Bet"] = OddGames["Bet"].str.replace(old, new, regex=True)

# Convert dots to commas for your specific requirements
OddGames['Bet'] = OddGames['Bet'].str.replace('.', ',', regex=False)

# --- 5. PIVOT TABLE ---
Unibet_Wide = OddGames.pivot_table(
    index=['IdGame', 'DateTime', 'Home', 'Away', 'Competition'],
    columns='Bet', values='Odd', aggfunc='first'
).reset_index()

# --- 6. FINAL COLUMN ALIGNMENT ---
Cols = [
    'IdGame', 'DateTime', 'Home', 'Away', 'Competition', 
    'A', 'D', 'H', 'H/D', 'D/A', 'H/A', 
    'H/D&M1,5', 'H/D&M2,5', 'H/D&M3,5', 'H/D&M4,5',
    'H/D&P1,5', 'H/D&P2,5', 'H/D&P3,5', 'H/D&P4,5', 
    'D/A&M1,5', 'D/A&M2,5', 'D/A&M3,5', 'D/A&M4,5',
    'D/A&P1,5', 'D/A&P2,5', 'D/A&P3,5', 'D/A&P4,5', 
    'H/A&M1,5', 'H/A&M2,5', 'H/A&M3,5', 'H/A&M4,5',
    'H/A&P1,5', 'H/A&P2,5', 'H/A&P3,5', 'H/A&P4,5', 
    'LDEM', 'LDEMP', 
    'H/D&LDEM', 'H/D&LDEMP', 'D/A&LDEM', 'D/A&LDEMP', 'H/A&LDEM', 'H/A&LDEMP', 
    'M0,5', 'M1,5', 'M2,5', 'M3,5', 'M4,5', 'M5,5',
    'P0,5', 'P1,5', 'P2,5', 'P3,5', 'P4,5', 'P5,5'
]

# Ensure all standard columns exist, filling missing ones with NaN
for col in Cols:
    if col not in Unibet_Wide.columns:
        Unibet_Wide[col] = np.nan

Unibet = Unibet_Wide[Cols].sort_values(by='DateTime')

# Final Verification
mask = ~OddGames['Bet'].isin(Cols)
print(f"‚úÖ Success: {len(Unibet)} matches processed.")
if len(OddGames[mask]['Bet'].unique()) > 0:
    print(f"üö© Unmapped bets: {OddGames[mask]['Bet'].unique()}")

# display(Unibet.head())

‚úÖ Success: 25 matches processed.
üö© Unmapped bets: <ArrowStringArray>
[       'HFoot/D',   'HFoot/StadeA',         'H&M1,5',         'H&P1,5',
         'D&M1,5',         'D&P1,5',         'A&M1,5',         'A&P1,5',
         'H&M2,5',         'H&P2,5',
 ...
     'D/A29&M4,5',     'D/A29&P4,5', 'ParisFc/D&M1,5', 'ParisFc/D&P1,5',
 'ParisFc/D&M2,5', 'ParisFc/D&P2,5', 'ParisFc/D&M3,5', 'ParisFc/D&P3,5',
 'ParisFc/D&M4,5', 'ParisFc/D&P4,5']
Length: 208, dtype: str


Bet,IdGame,DateTime,Home,Away,Competition,A,D,H,H/D,D/A,...,"M2,5","M3,5","M4,5","M5,5","P0,5","P1,5","P2,5","P3,5","P4,5","P5,5"
7,3368697_1,2026-02-14 14:00:00,Grenoble,Reims,Ligue 2 BKT¬Æ,1.79,3.45,4.35,,1.17,...,1.66,1.24,1.07,1.01,1.03,1.24,1.79,2.82,6.1,15.2
8,3368847_1,2026-02-14 14:00:00,Montpellier,Le Mans,Ligue 2 BKT¬Æ,3.15,3.08,2.28,1.29,1.46,...,1.49,1.16,1.04,1.01,1.05,1.33,2.04,3.32,8.5,23.0
2,3364905_1,2026-02-14 17:00:00,Marseille,Strasbourg,Ligue 1 McDonald's¬Æ,3.92,4.05,1.86,1.22,1.85,...,2.04,1.42,1.15,1.04,1.02,1.16,1.54,2.3,4.05,8.5
1,3364903_1,2026-02-14 19:00:00,Lille,Brest,Ligue 1 McDonald's¬Æ,6.0,4.3,1.56,1.11,,...,1.78,1.29,1.09,1.02,1.03,1.22,1.73,2.75,5.4,12.5
9,3368851_1,2026-02-14 20:00:00,Guingamp,Saint-Etienne,Ligue 2 BKT¬Æ,2.75,3.55,2.36,1.39,1.47,...,1.85,1.33,1.11,1.03,1.03,1.22,1.72,2.72,5.15,12.0


In [138]:
import os

# --- Save UNIBET odds ---
file_path = "Odds_Match.csv"

# 1. Check if the file exists
if os.path.exists(file_path):
    csv_df = pd.read_csv(file_path)
    # Ensure IdGame is treated as a string for comparison
    csv_df['IdGame'] = csv_df['IdGame'].astype(str)
else:
    # Create an empty DataFrame with the correct columns if file is missing
    csv_df = pd.DataFrame(columns=Unibet.columns)

# 2. Identify matches NOT already in the CSV
# We convert IdGame to string to ensure the comparison works perfectly
unibet_ids = Unibet['IdGame'].astype(str)
csv_ids = csv_df['IdGame'].unique()

is_new_match = ~unibet_ids.isin(csv_ids)

# 3. Combine and Save
if is_new_match.any():
    # Filter only the new matches
    new_data = Unibet[is_new_match]
    
    # Concatenate
    updated_csv = pd.concat([csv_df, new_data], ignore_index=True)
    
    # Standardize dates and sort
    updated_csv['DateTime'] = pd.to_datetime(updated_csv['DateTime'])
    updated_csv = updated_csv.sort_values(by='DateTime')
    
    # Save to CSV
    updated_csv.to_csv(file_path, index=False, encoding='utf-8')
    print(f"‚úÖ Added {len(new_data)} new matches to {file_path}")
else:
    print("‚ÑπÔ∏è No new matches found. CSV is already up to date.")
    

‚ÑπÔ∏è No new matches found. CSV is already up to date.


None