In [1]:
# Imports and Setup
import os
import sys

# Add the grid_collector directory to Python path
grid_collector_path = os.path.dirname(os.getcwd())  # Go up one directory to grid_collector
if grid_collector_path not in sys.path:
    sys.path.insert(0, grid_collector_path)  # Insert at beginning of path for priority
    print(f"Added to path: {grid_collector_path}")

from dotenv import load_dotenv
from src.collector.grid_collector import GridCollector
import logging
import pandas as pd
from datetime import datetime, timedelta

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('grid_collection.log'),
        logging.StreamHandler()
    ]
)

Added to path: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector


In [2]:
# Configuration and Initialization
# Load configuration
load_dotenv()
api_key = os.getenv('GRID_API_KEY')
if not api_key:
    raise ValueError("GRID_API_KEY environment variable is not set")

try:
    # Initialize collector
    collector = GridCollector(api_key)
    logging.info("Successfully initialized GridCollector")
except Exception as e:
    logging.error(f"Error initializing collector: {str(e)}")
    raise

2024-11-05 21:56:47,756 - INFO - Successfully initialized GridCollector


In [3]:
def verify_queries():
    # Get path to queries directory
    notebook_dir = os.getcwd()
    grid_collector_dir = os.path.dirname(notebook_dir)
    queries_dir = os.path.join(grid_collector_dir, 'queries')
    
    print(f"Checking queries directory: {queries_dir}")
    
    # List of required query files
    required_queries = [
        'tournaments.graphql',
        'matches.graphql',
        'players.graphql',
        'statistics.graphql',
        'teams.graphql'
    ]
    
    # Check each file
    print("\nChecking query files:")
    for query in required_queries:
        path = os.path.join(queries_dir, query)
        exists = os.path.exists(path)
        print(f"{'✓' if exists else '✗'} {query}")
        if not exists:
            print(f"  Missing file should be at: {path}")
            
    # If any files are missing, show how to create them
    if any(not os.path.exists(os.path.join(queries_dir, q)) for q in required_queries):
        print("\nMissing query files! Here's how to create them:")
        print(f"1. Create directory: {queries_dir}")
        print("2. Create the following files with their queries:")
        for query in required_queries:
            print(f"   - {query}")

# Run verification
verify_queries()

Checking queries directory: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries

Checking query files:
✓ tournaments.graphql
✓ matches.graphql
✓ players.graphql
✓ statistics.graphql
✓ teams.graphql


In [4]:
# Collect Tournament Data
try:
    logging.info("Collecting tournament data...")
    tournaments_df = collector.get_tournaments()
    logging.info(f"Collected {len(tournaments_df)} tournaments")
    
    # Display sample of tournaments
    print("\nSample of tournaments:")
    print(tournaments_df.head())
except Exception as e:
    logging.error(f"Error collecting tournament data: {str(e)}")
    raise

2024-11-05 21:56:56,545 - INFO - Collecting tournament data...
2024-11-05 21:56:56,554 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n    deprecationReason\n  }\n  possibleTypes {\n    ...TypeRef\n  }\n}\n\nfragment InputValue on __InputValue {\n  name\n  description\n  type {\n   

Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\tournaments.graphql
File exists: True


2024-11-05 21:56:57,507 - INFO - <<< {"data":{"__schema":{"queryType":{"name":"Query"},"types":[{"kind":"SCALAR","name":"Boolean","description":"Built-in Boolean"},{"kind":"INPUT_OBJECT","name":"BooleanFilter","description":"Boolean filter for true/false fields.","inputFields":[{"name":"equals","description":"Value to look for, can be nullable.","type":{"kind":"SCALAR","name":"Boolean"}}]},{"kind":"SCALAR","name":"Cursor","description":"Cursor value for pagination."},{"kind":"OBJECT","name":"DataProvider","fields":[{"name":"name","description":"The name of the external data provider.","args":[],"type":{"kind":"NON_NULL","ofType":{"kind":"SCALAR","name":"String"}},"isDeprecated":false},{"name":"description","description":"Description of the data provider.","args":[],"type":{"kind":"SCALAR","name":"String"},"isDeprecated":false}],"interfaces":[]},{"kind":"SCALAR","name":"Date","description":"Date formatted as ISO 8601"},{"kind":"SCALAR","name":"DateTime","description":"DateTime formatted

GridAPIError: Variable '$after' of type 'String' used in position expecting type 'Cursor'.

GraphQL request:1:36
1 | query GetTournaments($first: Int!, $after: String) {
  |                                    ^
2 |     tournaments(

GraphQL request:4:16
3 |         first: $first
4 |         after: $after
  |                ^
5 |     ) {

In [5]:
# Collect Match Data
try:
    logging.info("Collecting match data...")
    matches_df = collector.get_matches(days=7)
    logging.info(f"Collected {len(matches_df)} matches")
    
    # Basic match analysis
    print("\nMatches per tournament:")
    print(matches_df['tournament_name'].value_counts())
    
    print("\nSample of matches:")
    print(matches_df.head())
except Exception as e:
    logging.error(f"Error collecting match data: {str(e)}")
    raise

2024-11-05 21:57:14,090 - INFO - Collecting match data...
2024-11-05 21:57:14,100 - ERROR - Error collecting match data: Variable '$after' of type 'String' used in position expecting type 'Cursor'.

GraphQL request:1:38
1 | query GetRecentMatches($first: Int!, $after: String, $startDate: DateTime!, $endDate: DateTime!) {
  |                                      ^
2 |     allSeries(

GraphQL request:4:16
3 |         first: $first
4 |         after: $after
  |                ^
5 |         filter: {


Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\matches.graphql
File exists: True


GridAPIError: Variable '$after' of type 'String' used in position expecting type 'Cursor'.

GraphQL request:1:38
1 | query GetRecentMatches($first: Int!, $after: String, $startDate: DateTime!, $endDate: DateTime!) {
  |                                      ^
2 |     allSeries(

GraphQL request:4:16
3 |         first: $first
4 |         after: $after
  |                ^
5 |         filter: {

In [6]:
# Collect Team Data
try:
    logging.info("Collecting team data...")
    teams_df = collector.get_teams()
    logging.info(f"Collected {len(teams_df)} teams")
    
    # Get unique teams from matches
    match_team_ids = pd.concat([
        matches_df['team1_id'],
        matches_df['team2_id']
    ]).unique()
    
    print(f"\nTotal teams: {len(teams_df)}")
    print(f"Teams in recent matches: {len(match_team_ids)}")
except Exception as e:
    logging.error(f"Error collecting team data: {str(e)}")
    raise

2024-11-05 21:57:24,177 - INFO - Collecting team data...
2024-11-05 21:57:24,186 - ERROR - Error collecting team data: Variable '$after' of type 'String' used in position expecting type 'Cursor'.

GraphQL request:1:30
1 | query GetTeams($first: Int!, $after: String) {
  |                              ^
2 |     teams(

GraphQL request:4:16
3 |         first: $first
4 |         after: $after
  |                ^
5 |     ) {


Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\teams.graphql
File exists: True


GridAPIError: Variable '$after' of type 'String' used in position expecting type 'Cursor'.

GraphQL request:1:30
1 | query GetTeams($first: Int!, $after: String) {
  |                              ^
2 |     teams(

GraphQL request:4:16
3 |         first: $first
4 |         after: $after
  |                ^
5 |     ) {

In [7]:
# Collect Player Data
try:
    logging.info("Collecting player data for teams in recent matches...")
    all_players = []
    all_player_stats = []
    
    # Only get rosters for teams that have played recently
    for team_id in match_team_ids:
        try:
            # Get team roster
            roster_df = collector.get_team_roster(team_id)
            all_players.extend(roster_df.to_dict('records'))
            
            # Get stats for each player
            for player_id in roster_df['player_id']:
                try:
                    stats_df = collector.get_player_statistics(player_id)
                    if stats_df is not None:
                        stats_df['player_id'] = player_id
                        all_player_stats.append(stats_df)
                except Exception as e:
                    logging.warning(f"Error collecting stats for player {player_id}: {str(e)}")
                    continue
                    
        except Exception as e:
            logging.warning(f"Error collecting roster for team {team_id}: {str(e)}")
            continue
    
    # Convert to DataFrames
    players_df = pd.DataFrame(all_players)
    player_stats_df = pd.concat(all_player_stats, ignore_index=True) if all_player_stats else pd.DataFrame()
    
    logging.info(f"Collected data for {len(players_df)} players")
    logging.info(f"Collected statistics for {len(player_stats_df)} players")
    
    # Display sample data
    print("\nSample of players:")
    print(players_df.head())
    
    if not player_stats_df.empty:
        print("\nSample of player statistics:")
        print(player_stats_df.head())
except Exception as e:
    logging.error(f"Error in player data collection: {str(e)}")
    raise

2024-11-05 21:57:28,899 - INFO - Collecting player data for teams in recent matches...
2024-11-05 21:57:28,900 - ERROR - Error in player data collection: name 'match_team_ids' is not defined


NameError: name 'match_team_ids' is not defined

In [None]:
# Save All Data
try:
    # Create output directory
    output_dir = 'data'
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = os.path.join(output_dir, timestamp)
    os.makedirs(output_dir, exist_ok=True)
    
    # Save all DataFrames
    tournaments_df.to_csv(f'{output_dir}/tournaments.csv', index=False)
    matches_df.to_csv(f'{output_dir}/matches.csv', index=False)
    teams_df.to_csv(f'{output_dir}/teams.csv', index=False)
    players_df.to_csv(f'{output_dir}/players.csv', index=False)
    if not player_stats_df.empty:
        player_stats_df.to_csv(f'{output_dir}/player_stats.csv', index=False)
    
    logging.info(f"All data saved successfully to {output_dir}")
    
    # Print summary
    print("\nData Collection Summary:")
    print(f"Tournaments: {len(tournaments_df)}")
    print(f"Matches: {len(matches_df)}")
    print(f"Teams: {len(teams_df)}")
    print(f"Players: {len(players_df)}")
    print(f"Player Statistics: {len(player_stats_df)}")
except Exception as e:
    logging.error(f"Error saving data: {str(e)}")
    raise