In [12]:
import json
import pandas as pd
import os

def main():
    # Load the JSON data from file
    try:
        with open(r'data\team_players.json', 'r', encoding='utf-8') as file:
            teams_data = json.load(file)
    except FileNotFoundError:
        print("Error: teams.json file not found. Please make sure it exists in the current directory.")
        return
    
    # Load the CSV data from file
    try:
        player_stats = pd.read_csv('data\player_ELO.csv')
    except FileNotFoundError:
        print("Error: player_stats.csv file not found. Please make sure it exists in the current directory.")
        return
    
    # Default values for players not found in the CSV
    DEFAULT_ELO = 1500
    DEFAULT_FORM = 0.15  # Moderate form value
    DEFAULT_ROLE = "unknown"
    
    # Create a dictionary for quick lookup of player stats
    player_lookup = {}
    for _, row in player_stats.iterrows():
        player_name = row['Player']
        player_lookup[player_name] = {
            'ELO': row['ELO'],
            'Role': row['Role'],
            'Form': row['Form']
        }
    
    # Process each team and player
    result = {"teams": {}}
    
    for team_name, players in teams_data['teams'].items():
        result["teams"][team_name] = []
        
        for player in players:
            # Check if player exists in the stats
            if player in player_lookup:
                player_info = {
                    "name": player,
                    "ELO": player_lookup[player]['ELO'],
                    "Role": player_lookup[player]['Role'],
                    "Form": player_lookup[player]['Form'],
                    "present": True
                }
            else:
                player_info = {
                    "name": player,
                    "ELO": DEFAULT_ELO,
                    "Role": DEFAULT_ROLE,
                    "Form": DEFAULT_FORM,
                    "present": False
                }
            
            result["teams"][team_name].append(player_info)
    
    # Save the processed data to a new JSON file
    with open('processed_players.json', 'w') as outfile:
        json.dump(result, outfile, indent=2)
    
    print(f"Processing complete. Results saved to 'processed_players.json'")
    
    # Print summary of matches found
    total_players = sum(len(players) for players in teams_data['teams'].values())
    matches_found = sum(1 for team in result["teams"].values() for player in team if player["present"])
    
    print(f"\nSummary:")
    print(len(result['teams']))
    print(f"Total players: {total_players}")
    print(f"Matches found in CSV: {matches_found}")
    print(f"Players not found: {total_players - matches_found}")

if __name__ == "__main__":
    main()

Processing complete. Results saved to 'processed_players.json'

Summary:
10
Total players: 227
Matches found in CSV: 148
Players not found: 79


In [10]:
import json

def extract_absent_players():
    try:
        # Load the JSON data
        with open('processed_players.json', 'r') as file:
            data = json.load(file)
        
        # Extract players where present is False
        absent_players = []
        
        for team_name, players in data['teams'].items():
            for player in players:
                if player.get('present') == False:
                    # Add team information to each player
                    player_info = {
                        'team': team_name,
                        'name': player['name'],
                        'ELO': player['ELO'],
                        'Role': player['Role'],
                        'Form': player['Form']
                    }
                    absent_players.append(player_info)
        
        # Save the absent players to a new file
        with open('absent_players.json', 'w') as outfile:
            json.dump(absent_players, outfile, indent=2)
        
        print(f"Extracted {len(absent_players)} players with present=False")
        print("Data saved to 'absent_players.json'")
        
    except FileNotFoundError:
        print("Error: player_data.json file not found.")
    except json.JSONDecodeError:
        print("Error: Invalid JSON format in the input file.")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    extract_absent_players()

Extracted 79 players with present=False
Data saved to 'absent_players.json'


In [3]:
import pandas as pd
df = pd.read_csv(r"data\ipl_fully_standardized_features.csv")

In [4]:
df.head()

Unnamed: 0,team1,team2,venue,team1_avg_elo,team2_avg_elo,team1_avg_form,team2_avg_form,team2_batsmen_avg_elo,team2_batsmen_avg_form,team1_last_5_wins,team2_last_5_wins,team1_vs_team2_matches,team1_vs_team2_wins,team2_vs_team1_wins,head_to_head_wins,head_to_head_losses,team1_won
0,Royal Challengers Bangalore,Kolkata Knight Riders,M Chinnaswamy Stadium,1543.111111,1566.0,0.135119,0.156698,1562.8,0.155016,0.0,0.0,32,14,18,14,20,0
1,Kings XI Punjab,Chennai Super Kings,Punjab Cricket Association Stadium,1620.272727,1537.5,0.187674,0.155462,1534.0,0.128003,0.0,0.0,23,9,14,14,16,0
2,Delhi Capitals,Rajasthan Royals,Arun Jaitley Stadium,1587.909091,1576.375,0.132035,0.224602,1580.666667,0.184874,0.0,0.0,18,7,11,14,15,1
3,Mumbai Indians,Royal Challengers Bangalore,Wankhede Stadium,1556.5,1589.181818,0.162399,0.154401,1565.4,0.119714,0.0,0.0,32,18,14,19,14,0
4,Kolkata Knight Riders,Sunrisers Hyderabad,Eden Gardens,1534.454545,1561.0,0.166267,0.139321,1552.2,0.123268,1.0,0.0,9,7,2,26,11,1


In [5]:
venue = df.venue.unique()

In [6]:
venue

array(['M Chinnaswamy Stadium', 'Punjab Cricket Association Stadium',
       'Arun Jaitley Stadium', 'Wankhede Stadium', 'Eden Gardens',
       'Sawai Mansingh Stadium', 'Rajiv Gandhi International Stadium',
       'MA Chidambaram Stadium', 'Dr DY Patil Sports Academy', 'Newlands',
       "St George's Park", 'Kingsmead', 'SuperSport Park', 'Buffalo Park',
       'New Wanderers Stadium', 'De Beers Diamond Oval',
       'OUTsurance Oval', 'Brabourne Stadium', 'Narendra Modi Stadium',
       'Barabati Stadium', 'Vidarbha Cricket Association Stadium',
       'Himachal Pradesh Cricket Association Stadium', 'Nehru Stadium',
       'Holkar Cricket Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Maharashtra Cricket Association Stadium',
       'Shaheed Veer Narayan Singh International Stadium',
       'JSCA International Stadium Complex', 'Sheikh Zayed Stadium',
       'Sharjah Cricket Stadium', 'Dubai International Cricket Stadium',
       'Saurashtra Cricket A

In [None]:
l= [
    "M Chinnaswamy Stadium",
    "Punjab Cricket Association Stadium",
    "Arun Jaitley Stadium",
    "Wankhede Stadium",
    "Saurashtra Cricket Association Stadium"
    "Eden Gardens",
    "Sawai Mansingh Stadium",
    "Rajiv Gandhi International Stadium",
    "MA Chidambaram Stadium",
    "Dr DY Patil Sports Academy",
    "Brabourne Stadium",
    "Narendra Modi Stadium",
    "Barabati Stadium",
    "Vidarbha Cricket Association Stadium",
    "Himachal Pradesh Cricket Association Stadium",
    "Nehru Stadium",
    "Holkar Cricket Stadium",
    "Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium",
    "Maharashtra Cricket Association Stadium",
    "Shaheed Veer Narayan Singh International Stadium",
    "JSCA International Stadium Complex",
    "Green Park",
    "Ekana Cricket Stadium",
    "Barsapara Cricket Stadium"
] 
for i in venue:
    if i not in l:
        print(i)
    else:
        print(False)

False
False
False
False
Eden Gardens
False
False
False
False
Newlands
St George's Park
Kingsmead
SuperSport Park
Buffalo Park
New Wanderers Stadium
De Beers Diamond Oval
OUTsurance Oval
False
False
False
False
False
False
False
False
False
False
False
Sheikh Zayed Stadium
Sharjah Cricket Stadium
Dubai International Cricket Stadium
Saurashtra Cricket Association Stadium
False
False
False
