#### Web Scraping Bets

The following code scraps bets from "Caliente.com". It retrieves the "momios" of the Mexican Soccer league only.  

In [2]:
from bs4 import BeautifulSoup
import urllib,re
from lxml import etree
import pandas as pd
import numpy as np
from math import floor
from statistics import stdev,mean as sd,mean
import Bets as bt

Call the file with all the functions

This project will use BeautifulSoup to web scrap the web page, as the page structure isn't complex in order to use another libraries.


In [3]:
# Assing the host link
link='https://sports.caliente.mx/es_MX'
# Create a request object with headers
req = urllib.request.Request(link, headers={"User-Agent": "Mozilla/5.0"})
# Open the URL and read the HTML content
html = urllib.request.urlopen(req).read()
# Create a BeautifulSoup object to parse the HTML content
soup=BeautifulSoup(html,"html.parser")
# Scrapping the tags "div" where they class is equal to "fragment expander coupon-for-type"
Bets=soup.find_all('div',attrs={'class':"fragment expander coupon-for-type"})

In [4]:
dic={}
for Bet in Bets:
    # Parsing the HTML per each league
    parsed=etree.HTML(str(Bet))
    # League title
    League=str(parsed.xpath('//h4/text()')[0]).strip()
    # Teams per Bet
    teams=parsed.xpath('//td[contains(@class,"seln")]/div/button/@title')
    # "Momios" per Bet
    bets=parsed.xpath("//td[contains(@class,'seln')]/div/button/span/span[@class='price us']/text()")
    
    lenminimum=min(len(teams),len(bets))
    auxiliar=floor(lenminimum/3)   
    dfteams=np.array(teams[0:(auxiliar*3)]).reshape(auxiliar,3)
    dfbets=np.array(bets[0:(auxiliar*3)]).reshape(auxiliar,3)
    #Assigning the berts per league
    dic[League]=dic.get(League,{"Teams":dfteams,"Bets":dfbets})
    
keys=list(dic.keys())

In [7]:
keys

['México - Liga MX',
 'Mundial 2026 - Mundial 2026 - Clasificatorios CONMEBOL',
 'Amistosos',
 'CONCACAF Liga de Naciones - Nations League Partidos',
 'Mexico - Liga de Expansion MX',
 'La Liga',
 'Inglaterra - Premier League',
 'Italia - Serie A',
 'Bundesliga 1',
 'Ligue 1']

In [13]:
# The current analysis is interested in the Mexican soccer league. However, we could do it for the other leagues. 
league=dic['México - Liga MX']
teams=league['Teams']
bets=league['Bets']

momios=bt.transform(bets)

In [26]:
teams

array([['Atlas ', 'Empate ', 'Mazatlán FC '],
       ['FC Juárez ', 'Empate ', 'Pachuca '],
       ['Puebla ', 'Empate ', 'Chivas de Guadalajara '],
       ['León ', 'Empate ', 'Toluca '],
       ['América ', 'Empate ', 'Santos Laguna '],
       ['Querétaro FC ', 'Empate ', 'Tijuana Xolos de Caliente '],
       ['Tigres UANL ', 'Empate ', 'Cruz Azul '],
       ['Pumas UNAM ', 'Empate ', 'Monterrey '],
       ['Atlético San Luis ', 'Empate ', 'Necaxa '],
       ['León ', 'Empate ', 'Atlas '],
       ['Monterrey ', 'Empate ', 'Tijuana Xolos de Caliente '],
       ['FC Juárez ', 'Empate ', 'Atlético San Luis '],
       ['Necaxa ', 'Empate ', 'Pumas UNAM '],
       ['Mazatlán FC ', 'Empate ', 'Querétaro FC '],
       ['Cruz Azul ', 'Empate ', 'León '],
       ['Pachuca ', 'Empate ', 'Puebla '],
       ['Chivas de Guadalajara ', 'Empate ', 'Tigres UANL '],
       ['Monterrey ', 'Empate ', 'América '],
       ['Toluca ', 'Empate ', 'Atlético San Luis '],
       ['Santos Laguna ', 'Empate ', 

Creating a dictionary that will have as keys the match and the values contained are the simulations. 

In [15]:
Bet_Simulation={}

In [16]:
for item in range(len(teams)):

    teamnames=list(teams[item])
    teammomio=momios[item]
    #Function
    proportions=bt.bet(teammomio)
    gains=proportions["Gains"]
    numbers=proportions["Numbers"]
    #Selected is an index array that have the bets which have at least two positive results
    selected=bt.select(gains)
    selectedgains=[]
    selectednumbers=[]
    
    # Pass the dictonary into a dataframe
    for ite in selected:
        selectedgains.append(gains[ite])
        selectednumbers.append(numbers[ite])
    selectednumbers=pd.DataFrame(selectednumbers)
    selectednumbers.columns=["Percentage " +x for x in list(teams[item])]    
    
    #Considering all the scenarios from the select function
    all_results=bt.addmaxmin(selectedgains,teamnames)
    all_results["Sd"]=pd.DataFrame(gains).apply(lambda x: np.std(x),axis=1)
    all_results["Average"]=pd.DataFrame(gains).apply(lambda x: mean(x),axis=1)

    result=pd.concat([selectednumbers,all_results],axis=1)
    result=bt.minsort(result)

    # Scenario where the abs(maximum)>abs(minimum)
    abs_result=pd.DataFrame(selectedgains).apply(lambda x: bt.greater_than_abs_minimum(x),axis=1 )
    abs_result=pd.concat([selectednumbers,abs_result],axis=1).dropna()
    

    joined_teams=str('- '.join(list(teams[item])))
    print("Done "+ joined_teams)
    Bet_Simulation[joined_teams]=Bet_Simulation.get(joined_teams,{})
    Bet_Simulation[joined_teams]["All_Results"]=result
    Bet_Simulation[joined_teams]["Abs_Results"]=result.iloc[abs_result.index]


Done Atlas - Empate - Mazatlán FC 
Done FC Juárez - Empate - Pachuca 
Done Puebla - Empate - Chivas de Guadalajara 
Done León - Empate - Toluca 
Done América - Empate - Santos Laguna 
Done Querétaro FC - Empate - Tijuana Xolos de Caliente 
Done Tigres UANL - Empate - Cruz Azul 
Done Pumas UNAM - Empate - Monterrey 
Done Atlético San Luis - Empate - Necaxa 
Done León - Empate - Atlas 
Done Monterrey - Empate - Tijuana Xolos de Caliente 
Done FC Juárez - Empate - Atlético San Luis 
Done Necaxa - Empate - Pumas UNAM 
Done Mazatlán FC - Empate - Querétaro FC 
Done Cruz Azul - Empate - León 
Done Pachuca - Empate - Puebla 
Done Chivas de Guadalajara - Empate - Tigres UANL 
Done Monterrey - Empate - América 
Done Toluca - Empate - Atlético San Luis 
Done Santos Laguna - Empate - FC Juárez 
Done Tijuana Xolos de Caliente - Empate - Atlas 


The code only retrieves the top 100 according to the minsort function. All the documentation is included in BetsFunctions_Documentation

In [17]:
Bet_Simulation.keys()

dict_keys(['Atlas - Empate - Mazatlán FC ', 'FC Juárez - Empate - Pachuca ', 'Puebla - Empate - Chivas de Guadalajara ', 'León - Empate - Toluca ', 'América - Empate - Santos Laguna ', 'Querétaro FC - Empate - Tijuana Xolos de Caliente ', 'Tigres UANL - Empate - Cruz Azul ', 'Pumas UNAM - Empate - Monterrey ', 'Atlético San Luis - Empate - Necaxa ', 'León - Empate - Atlas ', 'Monterrey - Empate - Tijuana Xolos de Caliente ', 'FC Juárez - Empate - Atlético San Luis ', 'Necaxa - Empate - Pumas UNAM ', 'Mazatlán FC - Empate - Querétaro FC ', 'Cruz Azul - Empate - León ', 'Pachuca - Empate - Puebla ', 'Chivas de Guadalajara - Empate - Tigres UANL ', 'Monterrey - Empate - América ', 'Toluca - Empate - Atlético San Luis ', 'Santos Laguna - Empate - FC Juárez ', 'Tijuana Xolos de Caliente - Empate - Atlas '])

In [18]:
Bet_Simulation['Atlas - Empate - Mazatlán FC '].keys()

dict_keys(['All_Results', 'Abs_Results'])

In [25]:
Bet_Simulation['Atlas - Empate - Mazatlán FC ']['Abs_Results'].sort_values(by="Sd",ascending=False).sort_values(by="Average",ascending=False)

Unnamed: 0,Percentage Atlas,Percentage Empate,Percentage Mazatlán FC,Atlas,Empate,Mazatlán FC,Min,Max,Sd,Average,Max_Min
21628,0.271129,0.444193,0.284678,-0.552813,0.865609,0.494561,-0.552813,0.865609,2.400311,0.740654,0.312796
46412,0.347150,0.358223,0.294627,-0.427428,0.504535,0.546794,-0.427428,0.546794,2.447224,0.740598,0.119366
11257,0.120993,0.363331,0.515675,-0.800440,0.525991,1.707296,-0.800440,1.707296,2.364178,0.737486,0.906856
42220,0.087745,0.574920,0.337336,-0.855278,1.414662,0.771013,-0.855278,1.414662,2.405333,0.737334,0.559384
18130,0.429663,0.277479,0.292858,-0.291335,0.165412,0.537506,-0.291335,0.537506,2.331611,0.732857,0.246170
...,...,...,...,...,...,...,...,...,...,...,...
26807,0.529136,0.269093,0.201771,-0.127269,0.130191,0.059297,-0.127269,0.130191,0.730612,-0.426784,0.002922
34448,0.549320,0.256531,0.194149,-0.093978,0.077430,0.019280,-0.093978,0.077430,0.736676,-0.428430,-0.016548
28033,0.217193,0.260549,0.522259,-0.641773,0.094304,1.741858,-0.641773,1.741858,0.740393,-0.429574,1.100085
14806,0.374995,0.297918,0.327086,-0.381501,0.251257,0.717203,-0.381501,0.717203,0.752060,-0.436745,0.335702


In [51]:
Bet_Simulation['Syria - Empate - Kuwait ']['All_Results']

In [56]:
def absolute_value_selection(simulation):
    positive_numbers = [num for num in simulation if num > 0]
    negative_numbers=[num for num in simulation if num < 0]
    if len(positive_numbers)==2 and all(num > abs(negative_numbers[0]) for num in positive_numbers):
        return(simulation)


In [68]:
def negative_minimum(simulation):
    negative_numbers=[num for num in simulation if num < 0]
    minimum=min(np.abs(simulation))
    if minimum == np.abs(negative_numbers):
        return(simulation)
    else:
        None
