In [1]:

# imports
import os
import sys
import types
import json
import base64

# figure size/format
fig_width = 7
fig_height = 5
fig_format = 'retina'
fig_dpi = 96
interactivity = ''
is_shiny = False
is_dashboard = False
plotly_connected = True

# matplotlib defaults / format
try:
  import matplotlib.pyplot as plt
  plt.rcParams['figure.figsize'] = (fig_width, fig_height)
  plt.rcParams['figure.dpi'] = fig_dpi
  plt.rcParams['savefig.dpi'] = "figure"
  from IPython.display import set_matplotlib_formats
  set_matplotlib_formats(fig_format)
except Exception:
  pass

# plotly use connected mode
try:
  import plotly.io as pio
  if plotly_connected:
    pio.renderers.default = "notebook_connected"
  else:
    pio.renderers.default = "notebook"
  for template in pio.templates.keys():
    pio.templates[template].layout.margin = dict(t=30,r=0,b=0,l=0)
except Exception:
  pass

# disable itables paging for dashboards
if is_dashboard:
  try:
    from itables import options
    options.dom = 'fiBrtlp'
    options.maxBytes = 1024 * 1024
    options.language = dict(info = "Showing _TOTAL_ entries")
    options.classes = "display nowrap compact"
    options.paging = False
    options.searching = True
    options.ordering = True
    options.info = True
    options.lengthChange = False
    options.autoWidth = False
    options.responsive = True
    options.keys = True
    options.buttons = []
  except Exception:
    pass
  
  try:
    import altair as alt
    # By default, dashboards will have container sized
    # vega visualizations which allows them to flow reasonably
    theme_sentinel = '_quarto-dashboard-internal'
    def make_theme(name):
        nonTheme = alt.themes._plugins[name]    
        def patch_theme(*args, **kwargs):
            existingTheme = nonTheme()
            if 'height' not in existingTheme:
              existingTheme['height'] = 'container'
            if 'width' not in existingTheme:
              existingTheme['width'] = 'container'

            if 'config' not in existingTheme:
              existingTheme['config'] = dict()
            
            # Configure the default font sizes
            title_font_size = 15
            header_font_size = 13
            axis_font_size = 12
            legend_font_size = 12
            mark_font_size = 12
            tooltip = False

            config = existingTheme['config']

            # The Axis
            if 'axis' not in config:
              config['axis'] = dict()
            axis = config['axis']
            if 'labelFontSize' not in axis:
              axis['labelFontSize'] = axis_font_size
            if 'titleFontSize' not in axis:
              axis['titleFontSize'] = axis_font_size  

            # The legend
            if 'legend' not in config:
              config['legend'] = dict()
            legend = config['legend']
            if 'labelFontSize' not in legend:
              legend['labelFontSize'] = legend_font_size
            if 'titleFontSize' not in legend:
              legend['titleFontSize'] = legend_font_size  

            # The header
            if 'header' not in config:
              config['header'] = dict()
            header = config['header']
            if 'labelFontSize' not in header:
              header['labelFontSize'] = header_font_size
            if 'titleFontSize' not in header:
              header['titleFontSize'] = header_font_size    

            # Title
            if 'title' not in config:
              config['title'] = dict()
            title = config['title']
            if 'fontSize' not in title:
              title['fontSize'] = title_font_size

            # Marks
            if 'mark' not in config:
              config['mark'] = dict()
            mark = config['mark']
            if 'fontSize' not in mark:
              mark['fontSize'] = mark_font_size

            # Mark tooltips
            if tooltip and 'tooltip' not in mark:
              mark['tooltip'] = dict(content="encoding")

            return existingTheme
            
        return patch_theme

    # We can only do this once per session
    if theme_sentinel not in alt.themes.names():
      for name in alt.themes.names():
        alt.themes.register(name, make_theme(name))
      
      # register a sentinel theme so we only do this once
      alt.themes.register(theme_sentinel, make_theme('default'))
      alt.themes.enable('default')

  except Exception:
    pass

# enable pandas latex repr when targeting pdfs
try:
  import pandas as pd
  if fig_format == 'pdf':
    pd.set_option('display.latex.repr', True)
except Exception:
  pass

# interactivity
if interactivity:
  from IPython.core.interactiveshell import InteractiveShell
  InteractiveShell.ast_node_interactivity = interactivity

# NOTE: the kernel_deps code is repeated in the cleanup.py file
# (we can't easily share this code b/c of the way it is run).
# If you edit this code also edit the same code in cleanup.py!

# output kernel dependencies
kernel_deps = dict()
for module in list(sys.modules.values()):
  # Some modules play games with sys.modules (e.g. email/__init__.py
  # in the standard library), and occasionally this can cause strange
  # failures in getattr.  Just ignore anything that's not an ordinary
  # module.
  if not isinstance(module, types.ModuleType):
    continue
  path = getattr(module, "__file__", None)
  if not path:
    continue
  if path.endswith(".pyc") or path.endswith(".pyo"):
    path = path[:-1]
  if not os.path.exists(path):
    continue
  kernel_deps[path] = os.stat(path).st_mtime
print(json.dumps(kernel_deps))

# set run_path if requested
run_path = 'QzpcVXNlcnNcYWxpdW1cRGVza3RvcFxTVEFUXFNUQVQtNDY4LUVsby1Db250cmlidXRpb24tTWV0cmlj'
if run_path:
  # hex-decode the path
  run_path = base64.b64decode(run_path.encode("utf-8")).decode("utf-8")
  os.chdir(run_path)

# reset state
%reset

# shiny
# Checking for shiny by using False directly because we're after the %reset. We don't want
# to set a variable that stays in global scope.
if False:
  try:
    import htmltools as _htmltools
    import ast as _ast

    _htmltools.html_dependency_render_mode = "json"

    # This decorator will be added to all function definitions
    def _display_if_has_repr_html(x):
      try:
        # IPython 7.14 preferred import
        from IPython.display import display, HTML
      except:
        from IPython.core.display import display, HTML

      if hasattr(x, '_repr_html_'):
        display(HTML(x._repr_html_()))
      return x

    # ideally we would undo the call to ast_transformers.append
    # at the end of this block whenver an error occurs, we do 
    # this for now as it will only be a problem if the user 
    # switches from shiny to not-shiny mode (and even then likely
    # won't matter)
    import builtins
    builtins._display_if_has_repr_html = _display_if_has_repr_html

    class _FunctionDefReprHtml(_ast.NodeTransformer):
      def visit_FunctionDef(self, node):
        node.decorator_list.insert(
          0,
          _ast.Name(id="_display_if_has_repr_html", ctx=_ast.Load())
        )
        return node

      def visit_AsyncFunctionDef(self, node):
        node.decorator_list.insert(
          0,
          _ast.Name(id="_display_if_has_repr_html", ctx=_ast.Load())
        )
        return node

    ip = get_ipython()
    ip.ast_transformers.append(_FunctionDefReprHtml())

  except:
    pass

def ojs_define(**kwargs):
  import json
  try:
    # IPython 7.14 preferred import
    from IPython.display import display, HTML
  except:
    from IPython.core.display import display, HTML

  # do some minor magic for convenience when handling pandas
  # dataframes
  def convert(v):
    try:
      import pandas as pd
    except ModuleNotFoundError: # don't do the magic when pandas is not available
      return v
    if type(v) == pd.Series:
      v = pd.DataFrame(v)
    if type(v) == pd.DataFrame:
      j = json.loads(v.T.to_json(orient='split'))
      return dict((k,v) for (k,v) in zip(j["index"], j["data"]))
    else:
      return v

  v = dict(contents=list(dict(name=key, value=convert(value)) for (key, value) in kwargs.items()))
  display(HTML('<script type="ojs-define">' + json.dumps(v) + '</script>'), metadata=dict(ojs_define = True))
globals()["ojs_define"] = ojs_define
globals()["__spec__"] = None



In [2]:
#Libraries used in the project
#Basic libraries
import numpy as np
import pandas as pd
from datetime import datetime
from io import StringIO #yes
#Data visualization libraries
from lets_plot import *
LetsPlot.setup_html()
from great_tables import GT
import arviz as az
#Web scraping libraries
import selenium as se
from curl_cffi import requests
from bs4 import BeautifulSoup
import time
#For database connection and manipulation 
import duckdb
#For Hierarchical modeling
import pymc as pm
import xarray as xr
#for pyshiny
import shiny
import requests as req
import vetiver
import pins
import logging 
import json
import plotly.express as px

In [3]:
season_23_24 = pd.read_csv('data/2023-24.csv')
season_24_25 = pd.read_csv('data/2024-25.csv')
frames = [season_23_24, season_24_25]
full = pd.concat(frames)
GT(full.head(5))

Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,PSCH,PSCD,PSCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFH,BFD,BFA,1XBH,1XBD,1XBA,BFEH,BFED,BFEA,BFE>2.5,BFE<2.5,BFEAHH,BFEAHA,BFCH,BFCD,BFCA,1XBCH,1XBCD,1XBCA,BFECH,BFECD,BFECA,BFEC>2.5,BFEC<2.5,BFECAHH,BFECAHA
E0,11/08/2023,20:00,Burnley,Man City,0,3,A,0,2,A,C Pawson,6,17,1,8,11,8,6,5,0,0,1,0,8.0,5.5,1.33,8.75,5.25,1.34,8.0,5.5,1.35,8.58,5.51,1.37,8.0,5.0,1.25,9.5,5.25,1.33,9.5,5.68,1.39,9.02,5.35,1.35,1.67,2.2,1.68,2.29,1.71,2.4,1.65,2.27,1.5,1.86,2.07,1.86,2.07,1.93,2.09,1.85,2.03,9.0,5.25,1.33,8.75,5.25,1.33,8.5,5.25,1.35,9.62,5.81,1.33,7.5,4.6,1.29,10.5,5.25,1.3,10.5,5.81,1.36,9.27,5.45,1.33,1.67,2.2,1.65,2.35,1.73,2.45,1.64,2.28,1.5,1.95,1.98,1.95,1.97,,,1.92,1.95,,,,,,,,,,,,,,,,,,,,,,,,,,
E0,12/08/2023,12:30,Arsenal,Nott'm Forest,2,1,H,2,0,H,M Oliver,15,6,7,2,12,12,8,3,2,2,0,0,1.18,7.0,15.0,1.17,7.5,15.5,1.2,7.25,14.0,1.18,7.86,15.67,1.12,6.5,12.0,1.14,7.5,17.0,1.21,8.5,17.5,1.18,7.64,15.67,1.44,2.75,1.42,2.93,1.45,2.98,1.42,2.85,-2.0,1.88,2.02,1.88,2.01,1.91,2.06,1.87,1.99,1.18,7.0,15.0,1.18,7.0,14.5,1.2,7.0,14.0,1.19,8.0,16.0,1.12,6.5,12.0,1.22,7.0,13.0,1.22,8.4,19.0,1.19,7.43,15.98,1.5,2.63,1.49,2.65,1.52,2.79,1.49,2.63,-2.0,1.95,1.98,1.93,1.97,2.01,2.09,1.95,1.92,,,,,,,,,,,,,,,,,,,,,,,,,,
E0,12/08/2023,15:00,Bournemouth,West Ham,1,1,D,0,0,D,P Bankes,14,16,5,3,9,14,10,4,1,4,0,0,2.7,3.4,2.55,2.65,3.4,2.55,2.7,3.45,2.6,2.7,3.47,2.71,2.62,3.2,2.3,2.63,3.3,2.63,2.8,3.62,2.75,2.69,3.44,2.64,1.9,2.0,1.9,1.99,1.95,2.03,1.88,1.94,0.0,1.95,1.95,1.95,1.95,1.98,1.99,1.94,1.92,2.63,3.5,2.6,2.65,3.5,2.5,2.6,3.5,2.6,2.75,3.6,2.63,2.5,3.2,2.45,2.63,3.5,2.6,2.88,3.67,2.7,2.7,3.53,2.59,1.73,2.1,1.76,2.18,1.83,2.23,1.74,2.12,0.0,2.02,1.91,2.01,1.92,2.06,1.96,1.96,1.91,,,,,,,,,,,,,,,,,,,,,,,,,,
E0,12/08/2023,15:00,Brighton,Luton,4,1,H,1,0,H,D Coote,27,9,12,3,11,12,6,7,2,2,0,0,1.33,5.5,9.0,1.32,5.5,9.0,1.35,5.25,8.5,1.33,5.65,9.61,1.25,4.6,8.5,1.29,5.25,10.0,1.36,6.0,10.5,1.33,5.52,9.61,1.62,2.3,1.62,2.4,1.65,2.45,1.61,2.34,-1.5,1.95,1.95,1.95,1.95,1.98,2.0,1.93,1.93,1.25,6.5,11.0,1.26,6.0,11.0,1.3,5.5,9.5,1.27,6.36,11.36,1.22,5.5,9.0,1.25,5.75,13.0,1.34,6.59,13.0,1.28,5.99,10.91,1.53,2.5,1.56,2.54,1.62,2.66,1.55,2.48,-1.75,2.01,1.92,2.0,1.91,2.14,1.93,2.0,1.86,,,,,,,,,,,,,,,,,,,,,,,,,,
E0,12/08/2023,15:00,Everton,Fulham,0,1,A,0,0,D,S Attwell,19,9,9,2,12,6,10,4,0,2,0,0,2.2,3.4,3.3,2.2,3.4,3.25,2.25,3.4,3.25,2.27,3.45,3.35,2.1,3.2,3.0,2.2,3.3,3.25,2.3,3.57,3.45,2.24,3.43,3.3,2.01,1.89,2.0,1.89,2.04,1.92,1.97,1.86,-0.25,1.93,1.97,1.95,1.95,1.97,2.0,1.92,1.93,2.3,3.2,3.2,2.35,3.2,3.1,2.35,3.25,3.15,2.39,3.32,3.3,2.1,3.2,3.0,2.38,3.2,3.2,2.42,3.42,3.56,2.32,3.28,3.27,2.1,1.73,2.23,1.72,2.26,1.81,2.17,1.71,-0.25,2.06,1.87,2.04,1.88,2.08,1.99,1.98,1.88,,,,,,,,,,,,,,,,,,,,,,,,,,


In [4]:
Match_info = pd.read_csv('data/team_match_ids_with_dates_and_comps.csv')
GT(Match_info.head(5))

team_name,team_id,season,season_id,match_id,date,competition
Wolverhampton Wanderers FC,459,season-2023-2024,68731,3009117,"Aug 14, 2023",EPL
Wolverhampton Wanderers FC,459,season-2023-2024,68731,3009127,"Aug 19, 2023",EPL
Wolverhampton Wanderers FC,459,season-2023-2024,68731,3009134,"Aug 26, 2023",EPL
Wolverhampton Wanderers FC,459,season-2023-2024,68731,3131390,"Aug 29, 2023",FLC
Wolverhampton Wanderers FC,459,season-2023-2024,68731,3009143,"Sep 3, 2023",EPL
