# Sudanese Football Player Win/Loss Stats

This notebook compiles historical match data from `previous-matches.html` and produces win vs. loss counts for every player. It is ready to run in Google Colab; just upload the project (or this notebook with the HTML file) and execute each cell.


In [None]:
!pip install -q beautifulsoup4 lxml arabic-reshaper python-bidi

import pandas as pd
from bs4 import BeautifulSoup
from pathlib import Path
import arabic_reshaper
from bidi.algorithm import get_display

DEFAULT_HTML = Path("previous-matches.html")
COLAB_HTML = Path("/content/previous-matches.html")

PREVIOUS_MATCHES_PATH = None

if COLAB_HTML.exists():
    PREVIOUS_MATCHES_PATH = COLAB_HTML
elif DEFAULT_HTML.exists():
    PREVIOUS_MATCHES_PATH = DEFAULT_HTML
else:
    try:
        from google.colab import files  # type: ignore

        print("previous-matches.html not found. Please upload it from your computer.")
        uploaded = files.upload()
        if "previous-matches.html" in uploaded:
            PREVIOUS_MATCHES_PATH = DEFAULT_HTML
        else:
            raise FileNotFoundError("Upload cancelled or file renamed. Expected previous-matches.html.")
    except ModuleNotFoundError as exc:
        raise FileNotFoundError(
            "previous-matches.html not found. Place it beside the notebook or adjust the path in cell 1."
        ) from exc

if PREVIOUS_MATCHES_PATH is None or not PREVIOUS_MATCHES_PATH.exists():
    raise FileNotFoundError(
        "previous-matches.html not found even after upload. Confirm the filename/path and rerun cell 1."
    )


In [None]:
# Arabic font support (run once per session in Colab)
!wget -q -O /usr/local/share/fonts/Tajawal-Regular.ttf https://github.com/google/fonts/raw/main/ofl/tajawal/Tajawal-Regular.ttf
!wget -q -O /usr/local/share/fonts/Tajawal-Bold.ttf https://github.com/google/fonts/raw/main/ofl/tajawal/Tajawal-Bold.ttf

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

for font_path in [
    "/usr/local/share/fonts/Tajawal-Regular.ttf",
    "/usr/local/share/fonts/Tajawal-Bold.ttf",
]:
    fm.fontManager.addfont(font_path)

plt.rcParams["font.family"] = "Tajawal"
plt.rcParams["axes.unicode_minus"] = False


In [None]:
PLAYER_ALIASES = {
    "محمدعوض": "محمد عوض",
    "احمد": "احمد بشير",
}

reshaper = arabic_reshaper.ArabicReshaper({"support_ligatures": True})

def format_arabic(text: str) -> str:
    if not isinstance(text, str):
        return text
    return get_display(reshaper.reshape(text.strip()))


def normalize_player_name(name: str) -> str:
    cleaned = name.strip()
    cleaned = PLAYER_ALIASES.get(cleaned, cleaned)
    return cleaned


def extract_matches(path: str):
    html_path = Path(path)
    if not html_path.exists():
        raise FileNotFoundError(f"Could not find {html_path.resolve()}")

    soup = BeautifulSoup(html_path.read_text(encoding="utf-8"), "lxml")
    matches = []

    for card in soup.select(".match-card"):
        date = card.select_one(".match-date").get_text(strip=True)
        venue = card.select_one(".match-venue").get_text(strip=True)
        red_team = card.select_one(".team-name.red-team").get_text(strip=True)
        yellow_team = card.select_one(".team-name.yellow-team").get_text(strip=True)
        red_score = int(card.select_one(".score-row .red-score").get_text(strip=True))
        yellow_score = int(card.select_one(".score-row .yellow-score").get_text(strip=True))

        red_players = [
            normalize_player_name(span.get_text(strip=True))
            for span in card.select(".red-team-column .player-name")
        ]
        yellow_players = [
            normalize_player_name(span.get_text(strip=True))
            for span in card.select(".yellow-team-column .player-name")
        ]

        matches.append(
            {
                "date": date,
                "venue": venue,
                "red_team": red_team,
                "yellow_team": yellow_team,
                "red_score": red_score,
                "yellow_score": yellow_score,
                "red_players": red_players,
                "yellow_players": yellow_players,
            }
        )

    return matches


matches = extract_matches(PREVIOUS_MATCHES_PATH)
df_matches = pd.DataFrame(matches)
print(f"Loaded {len(matches)} matches from {PREVIOUS_MATCHES_PATH}")
df_matches


In [None]:
def expand_player_results(df):
    records = []
    for _, row in df.iterrows():
        if row["red_score"] > row["yellow_score"]:
            match_outcome = {"red": "win", "yellow": "loss"}
        elif row["yellow_score"] > row["red_score"]:
            match_outcome = {"red": "loss", "yellow": "win"}
        else:
            match_outcome = {"red": "draw", "yellow": "draw"}

        for player in row["red_players"]:
            records.append(
                {
                    "player": normalize_player_name(player),
                    "team": "red",
                    "result": match_outcome["red"],
                    "date": row["date"],
                    "venue": row["venue"],
                }
            )

        for player in row["yellow_players"]:
            records.append(
                {
                    "player": normalize_player_name(player),
                    "team": "yellow",
                    "result": match_outcome["yellow"],
                    "date": row["date"],
                    "venue": row["venue"],
                }
            )
    return pd.DataFrame(records)

player_results = expand_player_results(df_matches)
player_results.head()


In [None]:
summary = (
    player_results
    .groupby(["player", "result"])
    .size()
    .unstack(fill_value=0)
    .reset_index()
)

for col in ["win", "loss", "draw"]:
    if col not in summary:
        summary[col] = 0

summary["matches"] = summary[["win", "loss", "draw"]].sum(axis=1)
summary = summary.sort_values(by=["win", "matches"], ascending=[False, False]).reset_index(drop=True)
summary.head(10)


In [None]:
import matplotlib.pyplot as plt

summary_plot = summary.assign(
    player_display=summary["player"].apply(format_arabic)
)

summary_melted = summary_plot.melt(
    id_vars=["player", "player_display", "matches"],
    value_vars=["win", "loss", "draw"],
    var_name="result",
    value_name="count"
)

fig, ax = plt.subplots(figsize=(12, 8))
for result, data in summary_melted.groupby("result"):
    ax.barh(
        data["player_display"],
        data["count"],
        label=result.capitalize()
    )

ax.set_xlabel("Matches")
ax.set_title("Player Win/Loss Record")
ax.legend()
ax.invert_yaxis()
plt.tight_layout()
plt.show()


In [None]:
summary


In [None]:
summary_table = summary.copy()
summary_table["Win/Loss Ratio (%)"] = summary_table.apply(
    lambda row: (row["win"] / (row["win"] + row["loss"])) * 100 if (row["win"] + row["loss"]) else 0,
    axis=1,
)

summary_table = summary_table.rename(columns={
    "player": "Player",
    "matches": "Matches Played",
    "win": "Wins",
    "loss": "Losses",
})
summary_table = summary_table[["Player", "Matches Played", "Wins", "Losses", "Win/Loss Ratio (%)"]]
summary_table["Player"] = summary_table["Player"].apply(format_arabic)
summary_table.round({"Win/Loss Ratio (%)": 1})
