In [331]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import json
import os
import requests
from bs4 import BeautifulSoup
import re
import unidecode

In [333]:
pd.set_option('display.max_rows', None)

In [335]:
url = "https://data.ncaa.com/casablanca/game/6348229/pbp.json"

In [337]:
response = requests.get(url)

In [339]:
data = response.json()

In [341]:
home = data['meta']['teams'][0]['shortName']
away = data['meta']['teams'][1]['shortName']

In [343]:
events = []
score = '0-0'
for period in data['periods']:
    for play in period['playStats']:
        if play['score'] == '':
            score = score
        else:
            score = play['score']
        time = play['time']
        if play['visitorText']:
            team = 1
            event = play['visitorText']
        else:
            team = 0
            event = play['homeText']

        event_details = {
            'Score': score,
            'Time': time,
            'Event': event,
            'Team': team
        }
        events.append(event_details)

In [345]:
df = pd.DataFrame(events)

In [347]:
def clean_name(name):
    if not name:  # Check for None or empty string
        return None
    if ', ' in name:
        name = ' '.join(name.split(', ')[::-1])  # Reverse names if in "Last, First" format
    name = unidecode.unidecode(name)  # Remove accents and special characters
    name = name.strip().title()  # Strip extra spaces and standardize capitalization
    return name

In [351]:
# Define a function to categorize events
def categorize_event(event):
    if 'Goal by' in event:
        return 'Goal'
    elif 'Shot by' in event:
        return 'Shot'
    elif 'Foul' in event:
        return 'Foul'
    elif 'Corner kick' in event:
        return 'Corner Kick'
    elif 'Offside against' in event:
        return 'Offside'
    else:
        return 'Other'
        
def extract_player(event):
    # Patterns to capture both "Lastname, Firstname" and "Firstname Lastname"
    pattern = r'\b[A-Z][a-z]+,?\s*[A-Z][a-z]+'
    matches = re.findall(pattern, event)
    return matches[0] if matches else None

    
df['Name'] = df['Event'].apply(extract_player)  # Rename Player to Name
df['Name'] = df['Name'].apply(clean_name)
df['Event_Type'] = df['Event'].apply(categorize_event)
df['Team'] = df['Team'].apply(lambda x: home if x == 0 else away)
df['IsFoul'] = df['Event'].str.contains('Foul', case=False)

foul_df = df[df['IsFoul']]

foul_summary = foul_df.groupby(['Name', 'Team']).size().reset_index(name='Fouls')


In [353]:
foul_summary

Unnamed: 0,Name,Team,Fouls
0,Aleksandar Vukovic,Vermont,1
1,Alex Bamford,Vermont,3
2,David Ismail,Vermont,1
3,Haruhi Taneda,Vermont,1
4,Lineker Santos,Vermont,2
5,Rai Pinto,Vermont,1
6,Ryan Holmes,Vermont,1
7,Schulze Solano,Vermont,1
8,Sydney Wathuta,Vermont,1
9,Tarik Pannholzer,Vermont,1
