In [6]:
import json

In [7]:
# Define the input file name
input_filename = 'sports_events.json'
sports_data = [] # Initialize with an empty list

try:
    with open(input_filename, 'r') as f:
        sports_data = json.load(f)
    print(f"Successfully loaded data from '{input_filename}'.")
except FileNotFoundError:
    print(f"Warning: '{input_filename}' not found.")
    # Example structure for sports_data if the file is not found
    sports_data_example = [
        {"eventName": "Archery Finals", "sportName": "Archery", "date": "2024-08-10", "location": "Olympic Stadium", "athletes": ["Athlete A", "Athlete B"], "volunteers": ["Volunteer X"]},
        {"eventName": "Swimming Relay", "sportName": "Swimming", "date": "2024-08-11", "location": "Aquatics Center", "athletes": ["Athlete C"], "volunteers": ["Volunteer Y", "Volunteer Z"]},
        {"eventName": "100m Sprint", "sportName": "Athletics", "date": "2024-08-12", "location": "Olympic Stadium", "athletes": ["Athlete D", "Athlete E"], "volunteers": []},
        {"eventName": "Basketball Finals", "sportName": "Basketball", "date": "2024-08-13", "location": "Indoor Arena", "athletes": ["Team Alpha", "Team Beta"], "volunteers": ["Volunteer W"]},
        {"eventName": "Cycling Road Race", "sportName": "Cycling", "date": "2024-08-09", "location": "City Circuit", "athletes": ["Cyclist 1", "Cyclist 2"], "volunteers": ["Volunteer V"], "details": {"laps": 10}},
        {"eventName": "Table Tennis Singles", "sportName": "Table Tennis", "date": "2024-08-10", "athletes": ["Player 1"], "volunteers": ["Scorekeeper 1"]},
        {"eventName": "Weightlifting 100kg", "sportName": "Weightlifting", "date": "2024-08-11", "athletes": ["Lifter X"], "volunteers": ["Spotter A", "Spotter B"], "category": "Heavyweight"}
    ]
    # Create a dummy file for the sake of the notebook running
    try:
        with open(input_filename, 'w') as f_dummy:
            json.dump(sports_data_example, f_dummy, indent=4)
        print(f"A dummy '{input_filename}' has been created with sample data for demonstration.")
        # Now try to load it again
        with open(input_filename, 'r') as f_retry:
            sports_data = json.load(f_retry)
        print(f"Successfully loaded data from newly created dummy '{input_filename}'.")
    except Exception as e_create:
        print(f"Could not create or load dummy '{input_filename}': {e_create}")
        sports_data = [] # Ensure sports_data is empty if dummy creation fails
        
except json.JSONDecodeError:
    print(f"Error: Could not decode JSON from '{input_filename}'. Please ensure it's a valid JSON file.")
    sports_data = [] # Ensure sports_data is empty on decode error

# Display a sample of the loaded data
if sports_data:
    print("\nSample of loaded data (first event):")
    print(json.dumps(sports_data[0], indent=2))
    print(f"\nTotal events loaded: {len(sports_data)}")
else:
    print("\nNo data loaded or an error occurred during loading.")

Successfully loaded data from 'sports_events.json'.

Sample of loaded data (first event):
{
  "sportId": "FIG2003",
  "sportName": "FigureSkating - 2003",
  "season": "Winter",
  "startDate": "01/01/2003",
  "endDate": "01/02/2003",
  "athletes": [
    {
      "athleteId": "8"
    },
    {
      "athleteId": "392"
    },
    {
      "athleteId": "363"
    },
    {
      "athleteId": "214"
    },
    {
      "athleteId": "396"
    }
  ],
  "volunteers": [
    {
      "volunteerId": "V877",
      "role": "Volunteer"
    },
    {
      "volunteerId": "V905",
      "role": "Volunteer"
    },
    {
      "volunteerId": "V950",
      "role": "Volunteer"
    },
    {
      "volunteerId": "V952",
      "role": "Volunteer"
    }
  ]
}

Total events loaded: 623


In [8]:
if sports_data: # Proceed only if data was loaded
    processed_data = []
    for event in sports_data:
        # Create a copy to avoid modifying the original list of dictionaries in place
        # This is good practice if sports_data might be used again in its original form.
        processed_event = event.copy()
        
        # Remove 'athletes' key if it exists
        processed_event.pop('athletes', None)
        
        # Remove 'volunteers' key if it exists
        processed_event.pop('volunteers', None)
        
        processed_data.append(processed_event)

    # Display a sample of the processed data
    if processed_data:
        print("Sample of processed data (first event after removing keys):")
        print(json.dumps(processed_data[0], indent=2))
        print(f"\nTotal events processed: {len(processed_data)}")
    else:
        # This case might be reached if sports_data was not empty but all items failed processing (unlikely here)
        print("No data was processed (original data might have been empty or processing failed).")
else:
    print("Skipping removal of athlete/volunteer info as no data was loaded.")
    processed_data = [] # Ensure processed_data is defined and empty

Sample of processed data (first event after removing keys):
{
  "sportId": "FIG2003",
  "sportName": "FigureSkating - 2003",
  "season": "Winter",
  "startDate": "01/01/2003",
  "endDate": "01/02/2003"
}

Total events processed: 623


In [9]:
if processed_data: # Proceed only if there is data to sort
    # Sort the processed data by 'sportName' (case-insensitive)
    # Using str(event.get('sportName', '')) ensures graceful handling of missing or non-string 'sportName' keys
    # .lower() makes the sort case-insensitive
    sorted_data = sorted(processed_data, key=lambda event: str(event.get('sportName', '')).lower())

    # Display a sample of the sorted data
    print("Sample of sorted data:")
    if len(sorted_data) > 0:
        print("First event (after sorting):")
        print(json.dumps(sorted_data[0], indent=2))
    if len(sorted_data) > 1: # Check if there's more than one event to show the last one
        print("\nLast event (after sorting):")
        print(json.dumps(sorted_data[-1], indent=2))
    print(f"\nTotal events sorted: {len(sorted_data)}")
    
    # Optional: Display all sport names to verify sorting
    # print("\nSport names in sorted order:")
    # for event in sorted_data:
    #    print(event.get('sportName', 'N/A'))
        
else:
    print("Skipping sorting as there is no processed data.")
    sorted_data = [] # Ensure sorted_data is defined and empty

Sample of sorted data:
First event (after sorting):
{
  "sportId": "442025",
  "sportName": "44 - 2025",
  "season": "Fall",
  "startDate": "08/01/2025",
  "endDate": "08/02/2025"
}

Last event (after sorting):
{
  "sportId": "YAP2024",
  "sportName": "YAP - 2024",
  "season": "Fall",
  "startDate": "08/01/2024",
  "endDate": "08/02/2024"
}

Total events sorted: 623


In [10]:
if sorted_data: # Proceed only if there is data to save
    output_filename = 'cleaned_sports_events.json'
    try:
        with open(output_filename, 'w') as f:
            json.dump(sorted_data, f, indent=4)
        print(f"\nProcessed and sorted data successfully saved to '{output_filename}'")
        
        # Optional: Verify content of the saved file by reading a small part
        # print("\nVerifying saved file content (first 200 characters):")
        # with open(output_filename, 'r') as f_check:
        #     print(f_check.read(200) + "...")
            
    except IOError as e: # More specific exception for file I/O
        print(f"An I/O error occurred while saving the data to '{output_filename}': {e}")
    except Exception as e: # Catch other potential errors
        print(f"An unexpected error occurred while saving the data: {e}")
else:
    print("No data to save as 'sorted_data' is empty.")


Processed and sorted data successfully saved to 'cleaned_sports_events.json'
