In [1]:
import pickle 

In [2]:
with open("/home/turning/Jainit/TANQ/EVALUATION_OF_MODELS/llama/rotowire/results_rotowire.pickle", "rb") as f:
    pred_tables = pickle.load(f)

In [11]:
pred_tables_dict = {}
for pred_table in pred_tables:
    for k, v in pred_table.items():
        pred_tables_dict[k] = v

In [75]:
with open("/home/turning/Jainit/TANQ/EVALUATION_OF_MODELS/llama/rotowire/rotowire_2.pickle", "rb") as f:
    pred_tables = pickle.load(f)
    
for pred_table in pred_tables:
    for k, v in pred_table.items():
        pred_tables_dict[k] = v

In [76]:
len(pred_tables_dict)

728

In [77]:
import re

def extract_tables(text: str):
    """
    Extracts two markdown tables from text.
    
    The function looks for two labeled sections: one starting with "Team:" and one with "Player:".
    It returns a tuple (team_table, player_table) where each is a markdown formatted table.
    
    Requirements handled:
      1. Robust if one table is not there.
      2. Puts a markdown separator row after the header.
      3. Sets the first column name as "Team" or "Player" if it is empty.
      4. Returns two strings: first is team table, second is player table.
    """
    
    # Regex patterns to capture the tables following "Team:" and "Player:" labels.
    team_pattern = re.compile(r'Team:\s*\n((?:\|.*\n?)+)', re.IGNORECASE)
    player_pattern = re.compile(r'Player:\s*\n((?:\|.*\n?)+)', re.IGNORECASE)
    
    team_match = team_pattern.search(text)
    player_match = player_pattern.search(text)
    
    raw_team = team_match.group(1) if team_match else ""
    raw_player = player_match.group(1) if player_match else ""
    
    def process_table(raw_table: str, default_first_col: str) -> str:
        """
        Processes the raw table string into a markdown table.
        Ensures:
          - The first row (header) is processed.
          - A separator row of '---' is inserted.
          - The first column header is set to `default_first_col` if empty.
        """
        # Split lines and remove any empty lines.
        lines = [line.strip() for line in raw_table.splitlines() if line.strip()]
        if not lines:
            return ""
        
        def split_row(row: str):
            # Split the row by '|' and remove any empty items caused by leading/trailing pipes.
            parts = row.split("|")
            # Remove empty strings from the beginning and end if they exist.
            if parts and parts[0].strip() == "":
                parts = parts[1:]
            if parts and parts[-1].strip() == "":
                parts = parts[:-1]
            return [cell.strip() for cell in parts]
        
        # Process header row.
        header = split_row(lines[0])
        if not header or header[0] == "":
            # Set the first header cell if it is empty.
            if header:
                header[0] = default_first_col
            else:
                header = [default_first_col]
        
        # Create a markdown separator row matching the header width.
        separator = ["---"] * len(header)
        
        # Process body rows.
        body = [split_row(row) for row in lines[1:]]
        # Ensure each row has the same number of columns as the header.
        for row in body:
            if len(row) < len(header):
                row.extend([""] * (len(header) - len(row)))
        
        # Function to format a row as a markdown table row.
        def format_row(cells):
            return "| " + " | ".join(cells) + " |"
        
        # Build the final markdown table.
        out_lines = [format_row(header), format_row(separator)]
        for row in body:
            out_lines.append(format_row(row))
        
        return "\n".join(out_lines)
    
    team_table_md = process_table(raw_team, "Team") if raw_team else ""
    player_table_md = process_table(raw_player, "Player") if raw_player else ""
    
    return team_table_md, player_table_md



In [78]:
def create_markdown_tables(data):
    # Replace the custom <NEWLINE> token with actual newline characters.
    data = data.replace("<NEWLINE>", "\n")
    
    # Split the data into sections using "Team:" and "Player:" markers.
    sections = data.split("Player:")
    
    # Extract team section (everything after "Team:" up to "Player:") if available.
    team_section_raw = sections[0].split("Team:")
    team_section = team_section_raw[1].strip() if len(team_section_raw) > 1 else ""
    
    # Extract player section (everything after "Player:") if available.
    player_section = sections[1].strip() if len(sections) > 1 else ""
    
    # Helper function to split a row by "|" while preserving empty cells.
    def process_row(row):
        # Split the row by the delimiter and strip whitespace from each cell.
        cells = [cell.strip() for cell in row.split("|")]
        # Remove leading and trailing empty cells due to delimiters.
        if cells and cells[0] == "":
            cells = cells[1:]
        if cells and cells[-1] == "":
            cells = cells[:-1]
        return cells
    
    # Helper function to create a markdown table from header and row cells.
    def markdown_table(header_cells, row_cells):
        header_row = "| " + " | ".join(header_cells) + " |"
        separator_row = "| " + " | ".join(["---"] * len(header_cells)) + " |"
        data_rows = ["| " + " | ".join(cells) + " |" for cells in row_cells]
        return "\n".join([header_row, separator_row] + data_rows)
    
    # Process team section.
    if team_section:
        team_lines = [line for line in team_section.split("\n") if line.strip()]
        if team_lines:
            team_header = process_row(team_lines[0])
            team_rows = [process_row(row) for row in team_lines[1:]]
            # Set the first header cell to "Team"
            if team_header:
                team_header[0] = "Team"
            team_table = markdown_table(team_header, team_rows)
        else:
            team_table = ""
    else:
        team_table = ""
    
    # Process player section.
    if player_section:
        player_lines = [line for line in player_section.split("\n") if line.strip()]
        if player_lines:
            player_header = process_row(player_lines[0])
            player_rows = [process_row(row) for row in player_lines[1:]]
            # Set the first header cell to "Player"
            if player_header:
                player_header[0] = "Player"
            player_table = markdown_table(player_header, player_rows)
        else:
            player_table = ""
    else:
        player_table = ""
    
    return team_table, player_table


In [79]:
with open('/home/turning/Jainit/TANQ/EVALUATION_OF_MODELS/gemini/rotowire/test.data') as f:
    roto_tables = f.read().split("\n")[:-1]

In [80]:
roto_id_to_tables = {str(i): roto_tables[i] for i in range(len(roto_tables))}

In [81]:
len(pred_tables_dict) 

728

In [82]:
input_tables = {}
for k, v in pred_tables_dict.items():
    gold_team, gold_player = create_markdown_tables(roto_id_to_tables[str(k)])
    print(v)
    print(extract_tables(v))
    pred_team, pred_player = extract_tables(v)

    input_tables[k] = {
        'gold_team': gold_team,
        'gold_player': gold_player,
        'pred_team': pred_team,
        'pred_player': pred_player
    }

Final Answer: Team: 
|  | Losses | Total points | Wins |
| Suns |  | 88 | 19 |
| Hawks | 12 | 95 | 46 |
Player: 
|  | Assists | Points | Rebounds | Steals |
| Al Horford | 4 | 17 | 13 | 2 |
| Jeff Teague | 7 | 17 |  | 2 |
| Nikola Vucevic |  | 21 | 15 |  |
| Kyle Korver |  |  |  |  |
| Aaron Gordon |  |  |  |  |
| Evan Fournier |  |  |  |  |
('| Team | Losses | Total points | Wins |\n| --- | --- | --- | --- |\n| Suns |  | 88 | 19 |\n| Hawks | 12 | 95 | 46 |', '| Player | Assists | Points | Rebounds | Steals |\n| --- | --- | --- | --- | --- |\n| Al Horford | 4 | 17 | 13 | 2 |\n| Jeff Teague | 7 | 17 |  | 2 |\n| Nikola Vucevic |  | 21 | 15 |  |\n| Kyle Korver |  |  |  |  |\n| Aaron Gordon |  |  |  |  |\n| Evan Fournier |  |  |  |  |')
Final Answer: 
Team: 
|  | Percentage of field goals | Losses | Total points | Points in 1st quarter | Rebounds | Wins | 
| Bucks | 46 | 17 | 95 |  | 48 | 18 | 
| Knicks | 41 | 31 | 82 |  | 36 | 5 | 
Player: 
|  | Assists | Blocks | Defensive rebounds | 3-p

In [83]:
idx_to_table_id = {}
tabscore = {}
ii=0
for text, tables in input_tables.items():
    if tables['gold_team'] and tables['pred_team']:
        tabscore[ii] = [tables['gold_team'], tables['pred_team']]
        idx_to_table_id[ii] = str(text)+"$$TEAM"
        ii+=1
    else:
        print(tables)
    if tables['gold_player'] and tables['pred_player']:
        tabscore[ii] = [tables['gold_player'], tables['pred_player']]
        idx_to_table_id[ii] = str(text)+"$$PLAYER"
        ii+=1
    else:
        print(tables)

    

{'gold_team': '', 'gold_player': '| Player | Assists | Defensive rebounds | 3-pointers attempted | Field goals attempted | Field goals made | Free throws made | Minutes played | Personal fouls | Points | Total rebounds |\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |\n| Carmelo Anthony |  | 6 |  | 36 | 18 | 6 | 46 | 6 | 45 | 6 |\n| Kristaps Porzingis |  |  |  |  |  |  |  |  | 15 |  |\n| Courtney Lee |  |  |  |  |  |  |  |  | 17 | 9 |\n| Brandon Jennings | 11 |  |  |  |  |  |  |  | 18 |  |\n| Mindaugas Kuzminskas |  |  |  | 13 |  |  |  |  | 13 |  |\n| Justin Holiday |  |  | 13 |  |  |  |  |  | 15 |  |\n| Tim Hardaway Jr. |  |  |  |  |  |  |  |  | 19 |  |\n| Paul Millsap |  |  |  |  |  |  |  |  | 37 | 19 |\n| Dwight Howard |  |  |  |  |  |  |  |  | 19 |  |\n| Kent Bazemore |  |  |  |  |  |  |  |  | 24 |  |\n| Dennis Schroder | 15 |  |  |  |  |  |  |  | 23 |  |\n| Mike Dunleavy |  |  |  |  |  |  |  |  | 12 |  |', 'pred_team': '| Team | Losses | Total points | Overtim

In [89]:
len(tabscore)

1409

In [90]:
print(tabscore[1][0])

| Player | Assists | Points | Total rebounds | Steals |
| --- | --- | --- | --- | --- |
| Nikola Vucevic |  | 21 | 15 |  |
| Al Horford | 4 | 17 | 13 | 2 |
| Jeff Teague | 7 | 17 |  | 2 |


In [91]:
print(tabscore[2][0])

| Team | Percentage of field goals | Losses | Total points | Rebounds | Wins |
| --- | --- | --- | --- | --- | --- |
| Knicks | 41 | 31 | 82 | 36 | 5 |
| Bucks | 46 | 17 | 95 | 48 | 18 |


In [92]:
import json 

In [93]:
json.dump(tabscore, open("tabscore_input.json", "w"))
json.dump(idx_to_table_id, open("idx_to_table_id.json", "w"))

In [94]:
len(tabscore)

1409