In [7]:
# Import necessary libraries
import pandas as pd  # For data processing
import numpy as np  # For numerical calculations
import json  # For handling JSON format data
# Plotting
import matplotlib.pyplot as plt  # For generating charts
from mplsoccer import VerticalPitch  # For drawing football-related charts
# Statistical model fitting
import statsmodels.api as sm  # Provides a wide range of statistical models
import statsmodels.formula.api as smf  # Fits statistical models using formulas
# Opening data files
import os  # For operating system-related functions, such as path management
import pathlib  # For higher-level path operations
import warnings  # For controlling the display of warnings

# Set Pandas chained assignment mode to None to ignore related warnings
pd.options.mode.chained_assignment = None
# Ignore warning messages
warnings.filterwarnings('ignore')


In [8]:
# Load data - Store into the 'train' DataFrame
data = pd.DataFrame()  # Initialize an empty DataFrame
# Prepare the file path
#path = os.path.join(str(pathlib.Path().resolve()), 'data', 'events', 'events_England.json')  # Construct the file path for the data
path = "/Users/max/data/events/events_England.json"
# Open the JSON file and read the data
with open(path) as f:  # Open the specified JSON file
    data = json.load(f)  # Load the JSON data into the 'data' variable

# Convert the data into a Pandas DataFrame
data = pd.DataFrame(data)

# View basic information of the DataFrame, such as column names, data types, and memory usage
data['index'] = data.index

In [5]:
teams = pd.DataFrame()
teampath = "/Users/max/data/teams.json"
with open(teampath) as f:  # Open the specified JSON file
    teams = json.load(f)  # Load the JSON data into the 'data' variable

# Convert the data into a Pandas DataFrame
teams = pd.DataFrame(teams)

In [9]:
engplayers = pd.DataFrame()  # Initialize an empty DataFrame
playespath =   "/Users/max/data/players.json"# Construct the file path for the data

with open(playespath) as f:  # Open the specified JSON file
    engplayers = json.load(f)  # Load the JSON data into the 'data' variable

engplayers = pd.DataFrame(engplayers)

In [14]:
shotsA = data.loc[data['eventId'] == 10]
shotsA["Goal"] = shotsA.tags.apply(lambda x: 1 if {'id':101} in x else 0).astype(object)
shotsA

Unnamed: 0,eventId,subEventName,tags,playerId,positions,matchId,eventName,teamId,matchPeriod,eventSec,subEventId,id,index,Goal
46,10,Shot,"[{'id': 101}, {'id': 402}, {'id': 201}, {'id':...",25413,"[{'y': 41, 'x': 88}, {'y': 0, 'x': 0}]",2499719,Shot,1609,1H,94.595788,100,177959212,46,1
62,10,Shot,"[{'id': 401}, {'id': 201}, {'id': 1211}, {'id'...",26150,"[{'y': 52, 'x': 85}, {'y': 100, 'x': 100}]",2499719,Shot,1631,1H,179.854785,100,177959247,62,0
91,10,Shot,"[{'id': 101}, {'id': 403}, {'id': 201}, {'id':...",14763,"[{'y': 52, 'x': 96}, {'y': 100, 'x': 100}]",2499719,Shot,1631,1H,254.745027,100,177959280,91,1
128,10,Shot,"[{'id': 401}, {'id': 201}, {'id': 1215}, {'id'...",7868,"[{'y': 33, 'x': 81}, {'y': 0, 'x': 0}]",2499719,Shot,1609,1H,425.824035,100,177959289,128,0
249,10,Shot,"[{'id': 402}, {'id': 201}, {'id': 1205}, {'id'...",7868,"[{'y': 30, 'x': 75}, {'y': 0, 'x': 0}]",2499719,Shot,1609,1H,815.462015,100,177959429,249,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
642945,10,Shot,"[{'id': 401}, {'id': 1212}, {'id': 1802}]",8561,"[{'y': 45, 'x': 72}, {'y': 0, 'x': 0}]",2500098,Shot,1633,2H,1972.969422,100,251596053,642945,0
643023,10,Shot,"[{'id': 101}, {'id': 402}, {'id': 201}, {'id':...",41174,"[{'y': 33, 'x': 86}, {'y': 0, 'x': 0}]",2500098,Shot,1633,2H,2193.887080,100,251596096,643023,1
643051,10,Shot,"[{'id': 402}, {'id': 201}, {'id': 1201}, {'id'...",7879,"[{'y': 62, 'x': 88}, {'y': 100, 'x': 100}]",2500098,Shot,1623,2H,2377.197700,100,251596357,643051,0
643055,10,Shot,"[{'id': 402}, {'id': 201}, {'id': 1206}, {'id'...",145692,"[{'y': 38, 'x': 92}, {'y': 100, 'x': 100}]",2500098,Shot,1623,2H,2381.481625,100,251596359,643055,0


In [16]:
top5_ids = [38021, 120353, 54, 26150, 8422]  # Cleaner and more readable
top5_2018_england = engplayers.loc[engplayers['wyId'].isin(top5_ids)]
top5_2018_england

Unnamed: 0,passportArea,weight,firstName,middleName,lastName,currentTeamId,birthDate,height,role,birthArea,wyId,foot,shortName,currentNationalTeamId
9,"{'name': 'Denmark', 'id': '208', 'alpha3code':...",76,Christian,,Dannemann Eriksen,1624,1992-02-14,180,"{'code2': 'MD', 'code3': 'MID', 'name': 'Midfi...","{'name': 'Denmark', 'id': '208', 'alpha3code':...",54,right,C. Eriksen,7712.0
508,"{'name': 'Belgium', 'id': '56', 'alpha3code': ...",68,Kevin,,De Bruyne,1625,1991-06-28,181,"{'code2': 'MD', 'code3': 'MID', 'name': 'Midfi...","{'name': 'Belgium', 'id': '56', 'alpha3code': ...",38021,right,K. De Bruyne,5629.0
1719,"{'name': 'Egypt', 'id': '818', 'alpha3code': '...",72,Mohamed,,Salah Ghaly,1612,1992-06-15,175,"{'code2': 'FW', 'code3': 'FWD', 'name': 'Forwa...","{'name': 'Egypt', 'id': '818', 'alpha3code': '...",120353,left,Mohamed Salah,16129.0
3018,"{'name': 'England', 'id': 0, 'alpha3code': 'XE...",66,Wilfried,,Zaha,1628,1992-11-10,180,"{'code2': 'MD', 'code3': 'MID', 'name': 'Midfi...","{'name': 'C\u00f4te d'Ivoire', 'id': 384, 'alp...",8422,right,W. Zaha,
3037,"{'name': 'Algeria', 'id': 12, 'alpha3code': 'D...",62,Riyad,,Mahrez,1625,1991-02-21,179,"{'code2': 'MD', 'code3': 'MID', 'name': 'Midfi...","{'name': 'France', 'id': 250, 'alpha3code': 'F...",26150,left,R. Mahrez,16009.0


In [18]:
import pandas as pd

# 假设shotsA是第一个DataFrame，players是第二个DataFrame

# 从players中获取所有球员的currentTeamId
team_ids = top5_2018_england['currentTeamId'].unique()

# 在shotsA中过滤出这些队伍的射门数据
team_shots = shotsA[shotsA['teamId'].isin(team_ids)]

# 计算每个队伍的进球总数
team_goals = team_shots[team_shots['Goal'] == 1].groupby('teamId').size()
team_goals

teamId
1612    78
1624    67
1625    94
1628    35
dtype: int64