In [5]:
import pandas as pd

In [6]:
matches_detailed = pd.read_csv("../CleanedDatasets/Cleaning/matches_detailed.csv")
matches_detailed

Unnamed: 0.1,Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,...,notes,sh,sot,dist,fk,pk,pkatt,season,team,year
0,0,2024-08-17,21:30,La Liga,Matchweek 1,Sat,Away,W,2.0,1.0,...,,17.0,5.0,18.6,1.0,1,1,2025,Barcelona,2024
1,1,2024-08-24,19:00,La Liga,Matchweek 2,Sat,Home,W,2.0,1.0,...,,13.0,5.0,16.6,0.0,0,0,2025,Barcelona,2024
2,2,2024-08-27,21:30,La Liga,Matchweek 3,Tue,Away,W,2.0,1.0,...,,22.0,5.0,19.3,1.0,0,0,2025,Barcelona,2024
3,3,2024-08-31,17:00,La Liga,Matchweek 4,Sat,Home,W,7.0,0.0,...,,23.0,11.0,13.7,1.0,0,0,2025,Barcelona,2024
4,4,2024-09-15,16:15,La Liga,Matchweek 5,Sun,Away,W,4.0,1.0,...,,20.0,9.0,19.1,0.0,0,0,2025,Barcelona,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9013,48,2020-07-05,17:00,La Liga,Matchweek 34,Sun,Home,L,0.0,1.0,...,,7.0,2.0,18.9,1.0,0,0,2019,Espanyol,2020
9014,49,2020-07-08,22:00,La Liga,Matchweek 35,Wed,Away,L,0.0,1.0,...,,11.0,2.0,18.1,1.0,0,0,2019,Espanyol,2020
9015,50,2020-07-12,14:00,La Liga,Matchweek 36,Sun,Home,L,0.0,2.0,...,,14.0,3.0,20.8,3.0,0,0,2019,Espanyol,2020
9016,51,2020-07-16,21:00,La Liga,Matchweek 37,Thu,Away,L,0.0,1.0,...,,18.0,6.0,21.3,3.0,0,0,2019,Espanyol,2020


# Financial Analysis Based on Attendance Data
Analysis of team financial performance using attendance metrics from match data.
**Data Source:** `matches_detailed.csv`
## Attendance Metrics
**Calculated Metrics:**
- **Average Attendance** = Mean(attendance) per team
- **Total Attendance** = Sum(attendance) per team
- **Maximum Attendance** = Highest single-match attendance
- **Minimum Attendance** = Lowest single-match attendance
- **Matches with Attendance** = Count of matches with recorded data
- **Home Attendance** = Mean(attendance) for home matches
- **Away Attendance** = Mean(attendance) for away matches

In [7]:
# Attendance metrics
df = matches_detailed.dropna(subset=["attendance"]).copy()
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["year"] = df["date"].dt.year

# attendance stats
attendance_metrics = df.groupby("team").agg(
    AvgAttendance=("attendance", "mean"),
    TotalAttendance=("attendance", "sum"),
    MaxAttendance=("attendance", "max"),
    MinAttendance=("attendance", "min"),
    MatchesWithAttendance=("attendance", "count")
).reset_index().rename(columns={"team": "Team"})

# Home / Away attendance
venue_att = (
    df.groupby(["team", "venue"])["attendance"]
    .mean()
    .unstack()
    .reset_index()
    .rename(columns={"team": "Team", "Home": "HomeAttendance", "Away": "AwayAttendance"})
)
attendance_metrics = attendance_metrics.merge(venue_att, on="Team", how="left")
attendance_metrics.head()

Unnamed: 0,Team,AvgAttendance,TotalAttendance,MaxAttendance,MinAttendance,MatchesWithAttendance,AwayAttendance,HomeAttendance
0,Alavés,20686.783394,5730239.0,68954.0,2896.0,277,25974.223022,15361.028986
1,Almeria,21140.276316,3213322.0,92605.0,7558.0,152,29353.947368,12926.605263
2,Athletic Club,35119.560563,12467444.0,84817.0,3518.0,355,27954.24581,42407.011364
3,Atletico Madrid,42692.260623,15070368.0,80965.0,5401.0,353,28782.775862,56213.212291
4,Barcelona,44449.269122,15690592.0,95745.0,5711.0,353,30596.350282,58380.897727


## About Revenue Potential
An average ticket price of **€50** is assumed for La Liga matches, reflecting typical pricing across most clubs and standard seating areas.
Home matches per season = 19

Formula: Estimated Matchday Revenue = Avg Attendance × €50 × 19

In [8]:
print("Estimating revenue potential...")
avg_ticket_price = 50
home_matches_per_season = 19
attendance_metrics["EstimatedMatchdayRevenue"] = (
        attendance_metrics["AvgAttendance"] *
        avg_ticket_price *
        home_matches_per_season
)
attendance_metrics.head()

Estimating revenue potential...


Unnamed: 0,Team,AvgAttendance,TotalAttendance,MaxAttendance,MinAttendance,MatchesWithAttendance,AwayAttendance,HomeAttendance,EstimatedMatchdayRevenue
0,Alavés,20686.783394,5730239.0,68954.0,2896.0,277,25974.223022,15361.028986,19652440.0
1,Almeria,21140.276316,3213322.0,92605.0,7558.0,152,29353.947368,12926.605263,20083260.0
2,Athletic Club,35119.560563,12467444.0,84817.0,3518.0,355,27954.24581,42407.011364,33363580.0
3,Atletico Madrid,42692.260623,15070368.0,80965.0,5401.0,353,28782.775862,56213.212291,40557650.0
4,Barcelona,44449.269122,15690592.0,95745.0,5711.0,353,30596.350282,58380.897727,42226810.0


## Performance Normalization
Attendance Per Match = Total Attendance / Matches with Attendance

In [9]:
attendance_metrics["AttendancePerMatch"] = (attendance_metrics["TotalAttendance"] /attendance_metrics["MatchesWithAttendance"]
)
attendance_metrics.head()

Unnamed: 0,Team,AvgAttendance,TotalAttendance,MaxAttendance,MinAttendance,MatchesWithAttendance,AwayAttendance,HomeAttendance,EstimatedMatchdayRevenue,AttendancePerMatch
0,Alavés,20686.783394,5730239.0,68954.0,2896.0,277,25974.223022,15361.028986,19652440.0,20686.783394
1,Almeria,21140.276316,3213322.0,92605.0,7558.0,152,29353.947368,12926.605263,20083260.0,21140.276316
2,Athletic Club,35119.560563,12467444.0,84817.0,3518.0,355,27954.24581,42407.011364,33363580.0,35119.560563
3,Atletico Madrid,42692.260623,15070368.0,80965.0,5401.0,353,28782.775862,56213.212291,40557650.0,42692.260623
4,Barcelona,44449.269122,15690592.0,95745.0,5711.0,353,30596.350282,58380.897727,42226810.0,44449.269122


## Financial Score
Composite 0–100 index.
- Attendance Score (50%) = Avg Attendance / max(Avg Attendance) × 50
- Revenue Score (50%) = Estimated Revenue / max(Estimated Revenue) × 50
- Financial Score = Attendance Score + Revenue Score

In [10]:
print("Calculating financial score...")

find = attendance_metrics.copy()

max_att = find["AvgAttendance"].max()
find["AttendanceScore"] = (find["AvgAttendance"] / max_att * 50).fillna(0)

max_revenue = find["EstimatedMatchdayRevenue"].max()
find["RevenueScore"] = (find["EstimatedMatchdayRevenue"] / max_revenue * 50).fillna(0)

find["FinancialScore"] = (
        find["AttendanceScore"]
        + find["RevenueScore"]
)

financial_scores = find
financial_scores.head()

Calculating financial score...


Unnamed: 0,Team,AvgAttendance,TotalAttendance,MaxAttendance,MinAttendance,MatchesWithAttendance,AwayAttendance,HomeAttendance,EstimatedMatchdayRevenue,AttendancePerMatch,AttendanceScore,RevenueScore,FinancialScore
0,Alavés,20686.783394,5730239.0,68954.0,2896.0,277,25974.223022,15361.028986,19652440.0,20686.783394,22.607655,22.607655,45.215311
1,Almeria,21140.276316,3213322.0,92605.0,7558.0,152,29353.947368,12926.605263,20083260.0,21140.276316,23.103257,23.103257,46.206515
2,Athletic Club,35119.560563,12467444.0,84817.0,3518.0,355,27954.24581,42407.011364,33363580.0,35119.560563,38.380588,38.380588,76.761177
3,Atletico Madrid,42692.260623,15070368.0,80965.0,5401.0,353,28782.775862,56213.212291,40557650.0,42692.260623,46.656452,46.656452,93.312904
4,Barcelona,44449.269122,15690592.0,95745.0,5711.0,353,30596.350282,58380.897727,42226810.0,44449.269122,48.576607,48.576607,97.153215


In [11]:
attendance_metrics.to_csv('../CleanedDatasets/Financial/attendance_metrics.csv', index=False)
financial_scores.to_csv('../CleanedDatasets/Financial/financial_scores.csv', index=False)

In [12]:
print("Financial Analysis Complete!")
cols = [
    "Team", "FinancialScore",
    "AttendanceScore", "RevenueScore"
]
financial_scores[cols].sort_values("FinancialScore", ascending=False).head(10)

Financial Analysis Complete!


Unnamed: 0,Team,FinancialScore,AttendanceScore,RevenueScore
21,Real Madrid,100.0,50.0,50.0
4,Barcelona,97.153215,48.576607,48.576607
3,Atletico Madrid,93.312904,46.656452,46.656452
20,Real Betis,83.615293,41.807646,41.807646
2,Athletic Club,76.761177,38.380588,38.380588
24,Valencia,71.990096,35.995048,35.995048
23,Sevilla,68.684486,34.342243,34.342243
22,Real Sociedad,64.858518,32.429259,32.429259
13,Las Palmas,58.058517,29.029258,29.029258
25,Valladolid,53.705296,26.852648,26.852648


In [13]:
financial_scores.to_csv('../CleanedDatasets/Financial/financial_scoresOnly.csv', index=False)