In [1]:
# Imports and Setup
import os
import sys
import logging

# Add the grid_collector directory to Python path
grid_collector_path = os.path.dirname(os.getcwd())  # Go up one directory to grid_collector
if grid_collector_path not in sys.path:
    sys.path.insert(0, grid_collector_path)  # Insert at beginning of path for priority
    print(f"Added to path: {grid_collector_path}")

from dotenv import load_dotenv
from src.collector.grid_collector import GridCollector
import logging
import pandas as pd
from datetime import datetime, timedelta

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

Added to path: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector


In [2]:
# Configuration and Initialization
# Load configuration
load_dotenv()
api_key = os.getenv('GRID_API_KEY')
if not api_key:
    raise ValueError("GRID_API_KEY environment variable is not set")

try:
    # Initialize collector
    collector = GridCollector(api_key)
    logging.info("Successfully initialized GridCollector")
except Exception as e:
    logging.error(f"Error initializing collector: {str(e)}")
    raise

2024-11-06 14:55:40,490 - INFO - Successfully initialized GridCollector


In [3]:
def verify_queries():
    # Get path to queries directory
    notebook_dir = os.getcwd()
    grid_collector_dir = os.path.dirname(notebook_dir)
    queries_dir = os.path.join(grid_collector_dir, 'queries')
    
    print(f"Checking queries directory: {queries_dir}")
    
    # List of required query files
    required_queries = [
        'tournaments.graphql',
        'matches.graphql',
        'players.graphql',
        'statistics.graphql',
        'teams.graphql'
    ]
    
    # Check each file
    print("\nChecking query files:")
    for query in required_queries:
        path = os.path.join(queries_dir, query)
        exists = os.path.exists(path)
        print(f"{'✓' if exists else '✗'} {query}")
        if not exists:
            print(f"  Missing file should be at: {path}")
            
    # If any files are missing, show how to create them
    if any(not os.path.exists(os.path.join(queries_dir, q)) for q in required_queries):
        print("\nMissing query files! Here's how to create them:")
        print(f"1. Create directory: {queries_dir}")
        print("2. Create the following files with their queries:")
        for query in required_queries:
            print(f"   - {query}")

# Run verification
verify_queries()

Checking queries directory: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries

Checking query files:
✓ tournaments.graphql
✓ matches.graphql
✓ players.graphql
✓ statistics.graphql
✓ teams.graphql


In [4]:
# Collect Player Data
try:
    logging.info("Collecting player data...")
    players_df = collector.get_players()
    logging.info(f"Collected {len(players_df)} players")
    
    # Display sample of players
    print("\nPlayers Sample:")
    print(players_df.head())
    
except Exception as e:
    logging.error(f"Error collecting player data: {str(e)}")
    raise

2024-11-06 14:55:45,848 - INFO - Collecting player data...
2024-11-06 14:55:45,849 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\players.graphql
2024-11-06 14:55:45,858 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n 


Players Sample:
     id   nickname                             title team_id   team_name  \
0  3435      Sapec  Counter Strike: Global Offensive    3923  Eyeballers   
1  3436  Svedjehed  Counter Strike: Global Offensive       7         AGO   
2  3437      shiNe  Counter Strike: Global Offensive   49449         IBV   
3  3438     flusha  Counter Strike: Global Offensive    3923  Eyeballers   
4  3439      PANIX  Counter Strike: Global Offensive    1146      Sangal   

   private  
0    False  
1    False  
2    False  
3    False  
4    False  


In [None]:
# Collect Player Statistics
try:
    logging.info("Collecting player statistics...")
    player_stats_list = []
    total_players = len(players_df)
    
    for idx, player_id in enumerate(players_df['id']):
        try:
            logging.info(f"Collecting statistics for player {player_id} ({idx+1}/{total_players})")
            player_stats = collector.get_player_statistics(player_id)
            
            if player_stats is not None:
                # Extract game statistics
                game_stats = player_stats.get('game', {})
                segment_stats = player_stats.get('segment', [])
                
                kills = game_stats.get('kills', {}).get('sum', 0)
                deaths = game_stats.get('deaths', {}).get('sum', 0)
                assists = game_stats.get('killAssistsGiven', {}).get('sum', 0)
                damage_dealt = game_stats.get('damageDealt', {}).get('sum', 0)
                wins = sum([item.get('count', 0) for item in game_stats.get('won', []) if item.get('value')])
                games_played = game_stats.get('count', 0)
                
                first_kills = sum([item.get('count', 0) for item in game_stats.get('firstKill', []) if item.get('value')])
                first_kill_percentage = next((item.get('percentage', 0) for item in game_stats.get('firstKill', []) if item.get('value')), 0)
                
                # Extract rounds played from segments
                rounds_played = 0
                for segment in segment_stats:
                    if segment.get('type') == 'Round':
                        rounds_played = segment.get('count', 0)
                        break  # Assuming one segment of type 'Round'
                
                # Calculate K/D Ratio
                kd_ratio = kills / deaths if deaths != 0 else kills
                
                # Calculate Win Rate
                win_rate = (wins / games_played) * 100 if games_played != 0 else 0
                
                # Calculate ADR
                adr = damage_dealt / rounds_played if rounds_played != 0 else 0
                
                # Combine with player info
                player_stats_combined = {
                    'id': player_id,
                    'nickname': players_df.loc[players_df['id'] == player_id, 'nickname'].values[0],
                    'team_name': players_df.loc[players_df['id'] == player_id, 'team_name'].values[0],
                    'kills': kills,
                    'deaths': deaths,
                    'assists': assists,
                    'damage_dealt': damage_dealt,
                    'kd_ratio': kd_ratio,
                    'win_rate': win_rate,
                    'first_kills': first_kills,
                    'first_kill_percentage': first_kill_percentage,
                    'rounds_played': rounds_played,
                    'adr': adr,
                }
                player_stats_list.append(player_stats_combined)
            else:
                logging.warning(f"No statistics found for player {player_id}")
            
            # Be mindful of rate limits
            time.sleep(0.2)  # Adjust sleep time based on API rate limits

        except Exception as e:
            logging.error(f"Error collecting statistics for player {player_id}: {str(e)}")
            continue
    
    logging.info("Successfully collected player statistics")

except Exception as e:
    logging.error(f"Error during player statistics collection: {str(e)}")
    raise

In [4]:
# Collect Tournament Data
try:
    tournaments_df = collector.get_tournaments()
    print(f"\nCollected {len(tournaments_df)} tournaments")
    print("\nSample data:")
    print(tournaments_df.head())
except Exception as e:
    print(f"Error: {str(e)}")

2024-11-06 14:44:55,031 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\tournaments.graphql
2024-11-06 14:44:55,036 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n    deprecationReason\n  }\n  possibleTypes {\n    ...Ty


Collected 4054 tournaments

Sample data:
  id                             name            name_short start_date  \
0  1  United Masters League 2018-2019              UML 1819        NaT   
1  2              Dota PIT 2019 Minor   Dota PIT 2019 Minor        NaT   
2  3          HotShot Series Season 2     HotShot Series S2        NaT   
3  4      Blast Pro Series Miami 2019  Blast Pro Miami 2019        NaT   
4  5     Blast Pro Series Madrid 2019        BP Madrid 2019        NaT   

  end_date  private title_ids                         title_names  
0      NaT    False       [1]  [Counter Strike: Global Offensive]  
1      NaT    False       [2]         [Defense of the Ancients 2]  
2      NaT    False       [1]  [Counter Strike: Global Offensive]  
3      NaT    False       [1]  [Counter Strike: Global Offensive]  
4      NaT    False       [1]  [Counter Strike: Global Offensive]  


In [5]:
# Collect Match Data
try:
    logging.info("Collecting match data...")
    #ONLY CHANGE THE NUMBER OF DAYS BELOW PLEASE!!!
    matches_df = collector.get_matches(days=7) # Change days to collect more matches
    logging.info(f"Collected {len(matches_df)} matches")
    
    # Basic match analysis
    print("\nMatch Statistics:")
    print(f"Total matches collected: {len(matches_df)}")
    
    print(f"\nMatches by tournament:")
    print(matches_df['tournament_name'].value_counts())
    
    print(f"\nMatches by format:")
    print(matches_df['format_name'].value_counts())
    
    # Display sample of recent matches with key information
    print("\nRecent Matches Sample:")
    display_columns = [
        'id', 'tournament_name', 'team1_name', 'team2_name', 
        'format_short', 'start_time'
    ]
    print(matches_df[display_columns].head())
    
except Exception as e:
    logging.error(f"Error collecting match data: {str(e)}")
    raise

2024-11-06 14:48:59,482 - INFO - Collecting match data...
2024-11-06 14:48:59,482 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\matches.graphql
2024-11-06 14:49:01,766 - INFO - >>> {"query": "query GetRecentMatches($first: Int!, $after: Cursor) {\n  allSeries(\n    first: $first\n    after: $after\n    filter: {startTimeScheduled: {gte: \"2024-10-30T14:00:00+00:00\", lte: \"2024-11-06T14:00:00+00:00\"}}\n    orderBy: StartTimeScheduled\n  ) {\n    totalCount\n    edges {\n      cursor\n      node {\n        id\n        title {\n          nameShortened\n        }\n        tournament {\n          nameShortened\n        }\n        startTimeScheduled\n        format {\n          name\n          nameShortened\n        }\n        teams {\n          baseInfo {\n            id\n            name\n          }\n          scoreAdvantage\n        }\n      }\n    }\n    pageInfo {\n      hasPreviousPage\n      hasNextPage\n      startC


Match Statistics:
Total matches collected: 111

Matches by tournament:
tournament_name
CCT S2 - South America S4       25
Esports Battles Nov 2024 CS2    23
DBE 2024                        16
EPL S20 - CS2                   14
United21 Season 22              14
Esports Battles Oct 2024 CS2     5
A1 Gaming League S9              4
Thunderpick WC 2024 - Finals     4
Ljósleiðara deildin 2024         2
GRID-TEST                        2
T-esports Championship S3        2
Name: count, dtype: int64

Matches by format:
format_name
best-of-3    92
best-of-1    16
best-of-5     3
Name: count, dtype: int64

Recent Matches Sample:
        id               tournament_name         team1_name      team2_name  \
0  2732475     CCT S2 - South America S4  LA CHAMPIONS LIGA  Yawara Esports   
1  2738557  Esports Battles Oct 2024 CS2        Purple Haze      Gameinside   
2  2738475                 EPL S20 - CS2          Aurora YB           ECLOT   
3  2732476     CCT S2 - South America S4       MIBR Aca