In [1]:
import pandas as pd
import json

In [3]:
ODI_ODM_data = pd.read_csv("../data/raw/ODI_ODM_data.csv")
Test_MDM_data = pd.read_csv("../data/raw/Test_MDM_data.csv")
T20_data = pd.read_csv("../data/raw/T20_data.csv")
csv_file_path = '../data/interim/combined_data.csv'  # Replace with your CSV file path
combined_squad_path = '../data/processed/combined_squad.json'
datewise_squad_path = '../data/processed/datewise_squad.json'

In [4]:
combined_data = pd.concat([ODI_ODM_data,Test_MDM_data,T20_data], axis=0, ignore_index=True)

In [5]:
combined_data.to_csv(csv_file_path)

In [None]:
##FOR Model UI

In [1]:
import csv
import json

# Initialize an empty dictionary to store the data
data = {}

# Read the CSV file
with open(csv_file_path, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    
    for row in reader:
        match_id = row['MatchID']
        team = row['Team']
        opposition_team_name = row['Opposition']
        player = row['Player']
        fielder = row['Fielder']  # Add fielder from the row

        # Initialize the structure for the date and event if not already in data
        if match_id not in data:
            data[match_id] = {}
        
        if team not in data[match_id]:
            data[match_id][team] = []
        if opposition_team_name not in data[match_id]:
            data[match_id][opposition_team_name] = []

        # Add the player to the respective team list only
        if player not in data[match_id][team]:
            data[match_id][team].append(player)

        # Add the fielder to the opposition team as a player, if not already present
        if fielder and fielder not in data[match_id][opposition_team_name]:
            data[match_id][opposition_team_name].append(fielder)

# Convert the dictionary to JSON and print/save to file
json_output = json.dumps(data, indent=4)

# Alternatively, save the JSON to a file
with open(combined_squad_path, 'w') as outfile:
    outfile.write(json_output)


In [2]:
## FOR FRONTEND

In [5]:
import csv
import json
from collections import defaultdict
from datetime import datetime
from tqdm import tqdm


# Step 1: Read and Group CSV Data by MatchID
matches_dict = defaultdict(lambda: {'rows': [], 'date': '', 'event': '', 'match_type': ''})

with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        match_id = row['MatchID']
        matches_dict[match_id]['rows'].append(row)
        matches_dict[match_id]['date'] = row['Date']
        matches_dict[match_id]['event'] = row['Event']
        matches_dict[match_id]['match_type'] = row['Match Type']

# Step 2: Convert Grouped Data into a Sorted List of Matches
matches = []
for match_id, match_info in tqdm(matches_dict.items()):
    date_str = match_info['date']
    event = match_info['event']
    match_type = match_info['match_type']
    venue = match_info['rows'][0]['Venue']  # Assuming venue is the same for all rows in a match

    teams = set()
    for row in match_info['rows']:
        teams.add(row['Team'])
        teams.add(row['Opposition'])
    teams = list(teams)

    if len(teams) != 2:
        # Handle cases where teams are not properly identified
        continue

    team1, team2 = teams
    team1_players = set()
    team2_players = set()

    for row in match_info['rows']:
        team = row['Team']
        opposition = row['Opposition']
        player = row['Player']
        fielder = row['Fielder']

        if team == team1:
            team1_players.add(player)
            if fielder:
                team2_players.add(fielder)
        elif team == team2:
            team2_players.add(player)
            if fielder:
                team1_players.add(fielder)

    match_entry = {
        'date': datetime.strptime(date_str, '%Y-%m-%d'),
        'event': event,
        'match_id': match_id,
        'match_type': match_type,
        'venue': venue,
        'team1': team1,
        'team2': team2,
        'team1_players': list(team1_players),
        'team2_players': list(team2_players)
    }
    matches.append(match_entry)

# Step 3: Sort Matches Chronologically
matches.sort(key=lambda x: x['date'])

# Step 4: Initialize Data Structures for Tracking Past Squads
data = defaultdict(lambda: defaultdict(dict))
team_past_matches = defaultdict(lambda: defaultdict(list))  # team_past_matches[team][match_type] = list of squads

# Step 5: Process Each Match to Build JSON with Second Squads
for match in tqdm(matches):
    date_str = match['date'].strftime('%Y-%m-%d')
    event = match['event']
    match_type = match['match_type']
    venue = match['venue']
    team1 = match['team1']
    team2 = match['team2']
    squad1 = match['team1_players']
    squad2 = match['team2_players']

    # Create Match Keys
    key1 = f"{team1}_VS_{team2}_{venue}"
    key2 = f"{team2}_VS_{team1}_{venue}"

    # Determine the correct key based on existing data
    if key1 not in data[date_str][event]:
        key = key1
    else:
        key = key2

    if key not in data[date_str][event]:
        data[date_str][event][key] = {}


    if match_type in ["MDM", "Test"]:
        data[date_str][event][key]['Format'] = "Test"
    elif match_type in ["ODM", "ODI"]:
        data[date_str][event][key]['Format'] = "ODI"
    elif match_type == "T20":
        data[date_str][event][key]['Format'] = "T20"
  # Handle unexpected match types

    # Assign Current Squads
    data[date_str][event][key][team1] = squad1
    data[date_str][event][key][team2] = squad2

    # Function to Generate Second Squad
    def generate_second_squad(team, current_squad):
        past_squads = team_past_matches[team][match_type][-10:]  # Get last 10 squads
        past_players = set()
        for past_squad in past_squads:
            past_players.update(past_squad)
        # Second squad is past players minus current squad
        second_squad = list(past_players - set(current_squad))
        return second_squad

    # Generate Second Squads for Both Teams
    second_squad1 = generate_second_squad(team1, squad1)
    second_squad2 = generate_second_squad(team2, squad2)

    data[date_str][event][key][f"{team1}_Second_Squad"] = second_squad1
    data[date_str][event][key][f"{team2}_Second_Squad"] = second_squad2

    # Update Past Matches with Current Squads
    team_past_matches[team1][match_type].append(set(squad1))
    team_past_matches[team2][match_type].append(set(squad2))

# Step 6: Convert to Regular Dictionary and Save JSON
# Convert defaultdicts to regular dicts for JSON serialization
def convert_defaultdict_to_dict(d):
    if isinstance(d, defaultdict):
        d = {k: convert_defaultdict_to_dict(v) for k, v in d.items()}
    elif isinstance(d, list):
        d = [convert_defaultdict_to_dict(i) for i in d]
    elif isinstance(d, dict):
        d = {k: convert_defaultdict_to_dict(v) for k, v in d.items()}
    return d

final_data = convert_defaultdict_to_dict(data)

# Save to JSON file
with open(datewise_squad_path, 'w', encoding='utf-8') as outfile:
    json.dump(final_data, outfile, indent=4)

100%|████████████████████████████████████████████████| 16528/16528 [00:01<00:00, 8385.63it/s]
100%|███████████████████████████████████████████████| 16528/16528 [00:00<00:00, 16537.56it/s]
