In [None]:
import pandas as pd 
import numpy as np 

from datasets import load_dataset

In [None]:
class UnionFind:
    def __init__(self, elements):
        # Initialize each element as its own parent
        self.parent = {element: element for element in elements}
        self.rank = {element: 0 for element in elements}
    
    def find(self, x):
        # Find the representative/parent of the set containing x
        if self.parent[x] != x:
            # Path compression for efficiency
            self.parent[x] = self.find(self.parent[x])
        return self.parent[x]
    
    def union(self, x, y):
        # Union the sets containing x and y
        root_x = self.find(x)
        root_y = self.find(y)
        
        if root_x == root_y:
            return  # Already in the same set
        
        # Union by rank for efficiency
        if self.rank[root_x] < self.rank[root_y]:
            self.parent[root_x] = root_y
        elif self.rank[root_x] > self.rank[root_y]:
            self.parent[root_y] = root_x
        else:
            self.parent[root_y] = root_x
            self.rank[root_x] += 1

def partition_data(pairs):
    # Extract all unique elements
    unique_elements = set()
    for x, y in pairs:
        unique_elements.add(x)
        unique_elements.add(y)
    
    # Initialize Union-Find
    uf = UnionFind(unique_elements)
    
    # Union pairs
    for x, y in pairs:
        uf.union(x, y)
    
    # Group pairs by their representative
    partitions = {}
    for x, y in pairs:
        rep = uf.find(x)  # Could also use uf.find(y), they should be the same
        if rep not in partitions:
            partitions[rep] = []
        partitions[rep].append((x, y))
    
    return list(partitions.values())


In [None]:
dataset_name = "icc-test-championship-rankings-2023-2025-cycle"
dataset = load_dataset("konan-kun/" + dataset_name)
df = dataset['train'].to_pandas()

In [None]:
preference_data = []
matchday_dict = {}
unique_matches = []
team_dict = {0: 'South Africa', 1: 'Australia', 2: 'India', 3: 'New Zealand', 4: 'England', 5: 'Sri Lanka', 6: 'Bangladesh', 7: 'West Indies', 8: 'Pakistan'}
for idx, team_matches in enumerate(df['Matches']):
    for mat in team_matches:
        matchday = ' '.join(mat.split(',')[0].split(' ')[1:]) + mat.split(',')[1]
        if matchday in matchday_dict.keys():
            continue
        unique_matches.append(mat)
        match_status = mat.split(' ')[0]
        opponent = None
        if match_status == 'D':
            opponent = mat.split('vs')[-1].split('Match drawn')[0].strip()
        elif mat.replace(team_dict[idx], '_').count('_') == 2:
            opponent = mat.replace(team_dict[idx], '_').split('vs')[-1].split('_')[0].strip()
        else:
            if len(mat.split('vs')[-1].split('won')[0].strip().split(' ')) == 2:
                opponent = mat.split('vs')[-1].split('won')[0].strip().split(' ')[0].strip()
            else:
                opponent = ' '.join(mat.split('vs')[-1].split('won')[0].strip().split(' ')[:2])
        matchday_dict[matchday] = (team_dict[idx], opponent, match_status)

In [None]:
team = ["South Africa", "India", "Newzealand", "Sri Lanka", "England", "Australia"]
for key, value in matchday_dict.items():
    if value[0] in team and value[1] in team:
        preference_data.append(value)

In [None]:
gold_rankings = df['Team'].tolist()
gold_rankings

In [None]:
preference_data

In [None]:
llm_preference_data = [mat.split(',')[-1].split('by')[0].strip() for mat in unique_matches]
import random 
random.shuffle(llm_preference_data)
user_text = '\n'.join(llm_preference_data)

In [None]:
print(user_text)

In [None]:
import json 
import sys 
sys.path.append("../")
from utils import process_nfl_dataset
season = 2018
dataset = pd.read_csv("../data/nfl_mahomes_era_games.csv")
names = json.load(open("../data/names.json", "r"))
dataset["home_team"] = dataset["home_team"].map(names)
dataset["away_team"] = dataset["away_team"].map(names)

preference_data, team_identifier, _ = process_nfl_dataset(dataset, season)
# names = json.load(open("./data/names.json", "r"))
# gold_rankings = [names[id] for id in json.load(open("./data/ranks.json", "r"))[str(season)]]
gold_rankings = json.load(open("../data/ranks.json", "r"))[str(season)]
llm_preference_data = [f"{tup[0]} vs {tup[1]}, {tup[int(tup[2] == 'L')] + ' won' if tup[2] != 'D' else 'Match Drawn'}" for tup in preference_data]       
random.shuffle(llm_preference_data)
formatted_preference_data = '\n'.join(llm_preference_data)

In [None]:
print(formatted_preference_data)

In [74]:
teams = {
  "top": ["Los Angeles Rams", "New Orleans Saints", "Dallas Cowboys", "New England Patriots", "Los Angeles Chargers", "Houston Texans", "Kansas City Chiefs", "Chicago Bears", "Minnesota Vikings", "Baltimore Ravens", "Seattle Seahawks", "Washington Commanders", "Tennessee Titans", "New York Giants"],
  "bottom": ["Green Bay Packers", "Miami Dolphins", "Philadelphia Eagles", "Indianapolis Colts", "Cleveland Browns", "Carolina Panthers", "Detroit Lions", "San Francisco 49ers", "Jacksonville Jaguars", "Las Vegas Raiders", "Buffalo Bills", "Denver Broncos", "Tampa Bay Buccaneers", "New York Jets", "Arizona Cardinals", "Cincinnati Bengals"]
}


# {
# "top": ["Los Angeles Rams", "New Orleans Saints", "New England Patriots", "Houston Texans", "Los Angeles Chargers", "Chicago Bears", "Dallas Cowboys", "Kansas City Chiefs", "Philadelphia Eagles", "New York Giants", "Seattle Seahawks", "Indianapolis Colts", "Miami Dolphins", "Minnesota Vikings", "Tennessee Titans", "Washington Commanders"],
# "bottom": ["Green Bay Packers", "Baltimore Ravens", "Cincinnati Bengals", "New York Jets", "Pittsburgh Steelers", "Carolina Panthers", "Cleveland Browns", "Atlanta Falcons", "Denver Broncos", "Buffalo Bills", "Las Vegas Raiders", "Tampa Bay Buccaneers", "Arizona Cardinals", "Detroit Lions", "Jacksonville Jaguars", "San Francisco 49ers"]
# }


# {
#   "top": ["Los Angeles Rams", "New England Patriots", "New Orleans Saints", "Dallas Cowboys", "Kansas City Chiefs", "Houston Texans", "Los Angeles Chargers", "Philadelphia Eagles", "Baltimore Ravens", "Minnesota Vikings", "Seattle Seahawks", "Green Bay Packers", "Chicago Bears", "Tennessee Titans", "Buffalo Bills", "Washington Commanders"],
#   "bottom": ["Detroit Lions", "Cleveland Browns", "Indianapolis Colts", "Arizona Cardinals", "Carolina Panthers", "New York Giants", "Tampa Bay Buccaneers", "Jacksonville Jaguars", "Las Vegas Raiders", "Miami Dolphins", "Denver Broncos", "New York Jets", "San Francisco 49ers", "Cincinnati Bengals", "Pittsburgh Steelers", "Atlanta Falcons"]
# }

In [76]:
len(teams["top"]), len(teams["bottom"])

(14, 16)

In [77]:
print(teams['top'])

['Los Angeles Rams', 'New Orleans Saints', 'Dallas Cowboys', 'New England Patriots', 'Los Angeles Chargers', 'Houston Texans', 'Kansas City Chiefs', 'Chicago Bears', 'Minnesota Vikings', 'Baltimore Ravens', 'Seattle Seahawks', 'Washington Commanders', 'Tennessee Titans', 'New York Giants']


In [78]:
for team in teams['top']:
    if team not in gold_rankings[:16]:
        print(team, gold_rankings.index(team) + 1)

Washington Commanders 20
New York Giants 27


In [79]:
for team in teams['bottom']:
    if team not in gold_rankings[16:]:
        print(team, gold_rankings.index(team)+1)

Philadelphia Eagles 13
Indianapolis Colts 10
Cleveland Browns 16
