In [1]:
# Imports and Setup
import os
import sys
import logging

# Add the grid_collector directory to Python path
grid_collector_path = os.path.dirname(os.getcwd())  # Go up one directory to grid_collector
if grid_collector_path not in sys.path:
    sys.path.insert(0, grid_collector_path)  # Insert at beginning of path for priority
    print(f"Added to path: {grid_collector_path}")

from dotenv import load_dotenv
from src.collector.grid_collector import GridCollector
import logging
import pandas as pd
from datetime import datetime, timedelta

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

Added to path: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector


In [2]:
# Configuration and Initialization
# Load configuration
load_dotenv()
api_key = os.getenv('GRID_API_KEY')
if not api_key:
    raise ValueError("GRID_API_KEY environment variable is not set")

try:
    # Initialize collector
    collector = GridCollector(api_key)
    logging.info("Successfully initialized GridCollector")
except Exception as e:
    logging.error(f"Error initializing collector: {str(e)}")
    raise

2024-11-06 22:06:26,450 - INFO - Successfully initialized GridCollector


In [3]:
def verify_queries():
    # Get path to queries directory
    notebook_dir = os.getcwd()
    grid_collector_dir = os.path.dirname(notebook_dir)
    queries_dir = os.path.join(grid_collector_dir, 'queries')
    
    print(f"Checking queries directory: {queries_dir}")
    
    # List of required query files
    required_queries = [
        'tournaments.graphql',
        'matches.graphql',
        'players.graphql',
        'player_statistics.graphql',
        'titles.graphql',
    ]
    
    # Check each file
    print("\nChecking query files:")
    for query in required_queries:
        path = os.path.join(queries_dir, query)
        exists = os.path.exists(path)
        print(f"{'✓' if exists else '✗'} {query}")
        if not exists:
            print(f"  Missing file should be at: {path}")
            
    # If any files are missing, show how to create them
    if any(not os.path.exists(os.path.join(queries_dir, q)) for q in required_queries):
        print("\nMissing query files! Here's how to create them:")
        print(f"1. Create directory: {queries_dir}")
        print("2. Create the following files with their queries:")
        for query in required_queries:
            print(f"   - {query}")

# Run verification
verify_queries()

Checking queries directory: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries

Checking query files:
✓ tournaments.graphql
✓ matches.graphql
✓ players.graphql
✓ player_statistics.graphql
✓ titles.graphql


In [4]:
# Fetch all titles
titles_df = collector.get_all_titles()

# Display the titles
print("Available Titles:")
print(titles_df)

2024-11-06 22:06:26,480 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\titles.graphql
2024-11-06 22:06:26,485 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n    deprecationReason\n  }\n  possibleTypes {\n    ...TypeRef

Available Titles:
    id                                    name  name_shortened  private
0    1        Counter Strike: Global Offensive            csgo    False
1    2               Defense of the Ancients 2            dota    False
2    3                       League of Legends             lol    False
3    4            PlayerUnknowns Battlegrounds            pubg    False
4    5                          Kings of Glory             kog    False
5    6                                Valorant             val    False
6    7     PlayerUnknowns Battlegrounds Mobile          PUBG-M    False
7    8                  Drone Champions League             dcl    False
8    9                                  FIFA20          FIFA20    False
9   10                                FallGuys        FallGuys    False
10  11                          Mobile Legends  Mobile-Legends    False
11  12                                 F1 2020         F1 2020    False
12  13             League of Legends Wild Rift

In [5]:
# Collect Player Data
try:
    logging.info("Collecting player data...")
    players_df = collector.get_players(limit=5)  # 'title_id' defaults to '28' for CS2, limit to 5 for sample remove for all
    logging.info(f"Collected {len(players_df)} players")
    
    # Display sample of players
    print("\nPlayers Sample:")
    print(players_df.head())
    
except Exception as e:
    logging.error(f"Error collecting player data: {str(e)}")
    raise



2024-11-06 22:06:27,853 - INFO - Collecting player data...
2024-11-06 22:06:27,853 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\players.graphql
2024-11-06 22:06:29,495 - INFO - >>> {"query": "query GetAllPlayers($first: Int!, $after: Cursor, $filter: PlayerFilter) {\n  players(first: $first, after: $after, filter: $filter) {\n    edges {\n      node {\n        id\n        nickname\n        title {\n          id\n          name\n        }\n        team {\n          id\n          name\n        }\n        private\n      }\n    }\n    pageInfo {\n      hasNextPage\n      endCursor\n    }\n  }\n}", "variables": {"filter": {"titleId": "28"}, "first": 50, "after": null}}
2024-11-06 22:06:30,396 - INFO - <<< {"data":{"players":{"edges":[{"node":{"id":"18932","nickname":"AG Nuke","title":{"id":"28","name":"Counter Strike 2"},"team":{"id":"52314","name":"CS2-1"},"private":false}},{"node":{"id":"18933","nickname":"BadjoSP","title":


Players Sample:
      id      nickname             title team_id team_name  private
0  18932       AG Nuke  Counter Strike 2   52314     CS2-1    False
1  18933       BadjoSP  Counter Strike 2   52314     CS2-1    False
2  18934        wunder  Counter Strike 2   52314     CS2-1    False
3  18935     Bulletito  Counter Strike 2   52315     CS2-2    False
4  18936  AGmurdercore  Counter Strike 2   52315     CS2-2    False


In [6]:
# Collect Tournament Data
try:
    tournaments_df = collector.get_tournaments()
    print(f"\nCollected {len(tournaments_df)} tournaments")
    print("\nSample data:")
    print(tournaments_df.head())
except Exception as e:
    print(f"Error: {str(e)}")

2024-11-06 22:06:30,420 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\tournaments.graphql
2024-11-06 22:06:32,505 - INFO - >>> {"query": "query GetTournaments($first: Int!, $after: Cursor) {\n  tournaments(first: $first, after: $after) {\n    pageInfo {\n      hasNextPage\n      endCursor\n    }\n    edges {\n      cursor\n      node {\n        id\n        name\n        nameShortened\n        startDate\n        endDate\n        private\n        titles {\n          id\n          name\n        }\n      }\n    }\n    totalCount\n  }\n}", "variables": {"first": 50, "after": null}}
2024-11-06 22:06:33,196 - INFO - <<< {"data":{"tournaments":{"pageInfo":{"hasNextPage":true,"endCursor":"PjMLC1oLC0FBCwtaCws="},"edges":[{"cursor":"PjMLC1oLC0YLC1oLCw==","node":{"id":"1","name":"United Masters League 2018-2019","nameShortened":"UML 1819","private":false,"titles":[{"id":"1","name":"Counter Strike: Global Offensive"}]}},{"cursor":"PjM


Collected 4054 tournaments

Sample data:
  id                             name            name_short start_date  \
0  1  United Masters League 2018-2019              UML 1819        NaT   
1  2              Dota PIT 2019 Minor   Dota PIT 2019 Minor        NaT   
2  3          HotShot Series Season 2     HotShot Series S2        NaT   
3  4      Blast Pro Series Miami 2019  Blast Pro Miami 2019        NaT   
4  5     Blast Pro Series Madrid 2019        BP Madrid 2019        NaT   

  end_date  private title_ids                         title_names  
0      NaT    False       [1]  [Counter Strike: Global Offensive]  
1      NaT    False       [2]         [Defense of the Ancients 2]  
2      NaT    False       [1]  [Counter Strike: Global Offensive]  
3      NaT    False       [1]  [Counter Strike: Global Offensive]  
4      NaT    False       [1]  [Counter Strike: Global Offensive]  


In [7]:
# Collect Match Data
try:
    logging.info("Collecting match data...")
    #ONLY CHANGE THE NUMBER OF DAYS BELOW PLEASE!!!
    matches_df = collector.get_matches(days=7) # Change days to collect more matches
    logging.info(f"Collected {len(matches_df)} matches")
    
    # Basic match analysis
    print("\nMatch Statistics:")
    print(f"Total matches collected: {len(matches_df)}")
    
    print(f"\nMatches by tournament:")
    print(matches_df['tournament_name'].value_counts())
    
    print(f"\nMatches by format:")
    print(matches_df['format_name'].value_counts())
    
    # Display sample of recent matches with key information
    print("\nRecent Matches Sample:")
    display_columns = [
        'id', 'tournament_name', 'team1_name', 'team2_name', 
        'format_short', 'start_time'
    ]
    print(matches_df[display_columns].head())
    
except Exception as e:
    logging.error(f"Error collecting match data: {str(e)}")
    raise

2024-11-06 22:10:36,943 - INFO - Collecting match data...
2024-11-06 22:10:36,944 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\matches.graphql
2024-11-06 22:10:39,184 - INFO - >>> {"query": "query GetRecentMatches($first: Int!, $after: Cursor) {\n  allSeries(\n    first: $first\n    after: $after\n    filter: {startTimeScheduled: {gte: \"2024-10-30T22:00:00+00:00\", lte: \"2024-11-06T22:00:00+00:00\"}}\n    orderBy: StartTimeScheduled\n  ) {\n    totalCount\n    edges {\n      cursor\n      node {\n        id\n        title {\n          nameShortened\n        }\n        tournament {\n          nameShortened\n        }\n        startTimeScheduled\n        format {\n          name\n          nameShortened\n        }\n        teams {\n          baseInfo {\n            id\n            name\n          }\n          scoreAdvantage\n        }\n      }\n    }\n    pageInfo {\n      hasPreviousPage\n      hasNextPage\n      startC


Match Statistics:
Total matches collected: 109

Matches by tournament:
tournament_name
CCT S2 - South America S4       25
Esports Battles Nov 2024 CS2    23
DBE 2024                        16
United21 Season 22              14
EPL S20 - CS2                   13
Esports Battles Oct 2024 CS2     4
A1 Gaming League S9              4
Thunderpick WC 2024 - Finals     4
Ljósleiðara deildin 2024         2
GRID-TEST                        2
T-esports Championship S3        2
Name: count, dtype: int64

Matches by format:
format_name
best-of-3    90
best-of-1    16
best-of-5     3
Name: count, dtype: int64

Recent Matches Sample:
        id               tournament_name      team1_name        team2_name  \
0  2732478     CCT S2 - South America S4           Nitro        9z Academy   
1  2738558  Esports Battles Oct 2024 CS2     Purple Haze            Seight   
2  2738559  Esports Battles Oct 2024 CS2          Seight       Purple Haze   
3  2738477                 EPL S20 - CS2         INSILIO   

In [8]:
# Collect Player Statistics
try:
    logging.info("Collecting player statistics...")
    
    # Get all players from recent matches
    all_team_ids = pd.concat([
        matches_df['team1_id'],
        matches_df['team2_id']
    ]).unique()
    
    # Get players from these teams
    players_df = collector.get_players()
    active_players = players_df[players_df['team_id'].isin(all_team_ids)]
    print(f"\nFound {len(active_players)} active players")
    
    # Collect statistics for active players one by one
    print("\nCollecting player statistics...")
    all_player_stats = []
    
    # Use tqdm for progress bar
    from tqdm.notebook import tqdm
    
    for player_id in tqdm(active_players['id'].tolist()):
        try:
            # Include the required filter with time window
            variables = {
                'playerId': player_id,
                'filter': {
                    'timeWindow': 'LAST_3_MONTHS'  # This is a safe default
                }
            }
            
            result = collector._execute_query(
                collector._load_query('player_statistics'),
                variables,
                collector.stats_client
            )
            
            if result and 'playerStatistics' in result:
                stats = result['playerStatistics']
                if stats and 'series' in stats:
                    # Flatten the nested dictionary structure
                    flat_stats = {
                        'player_id': player_id,
                        'series_count': stats.get('series', {}).get('count', 0),
                        'game_count': stats.get('game', {}).get('count', 0),
                        'total_kills': stats.get('series', {}).get('kills', {}).get('sum', 0),
                        'avg_kills': stats.get('series', {}).get('kills', {}).get('avg', 0),
                        'max_kills': stats.get('series', {}).get('kills', {}).get('max', 0),
                        'total_deaths': stats.get('series', {}).get('deaths', {}).get('sum', 0),
                        'avg_deaths': stats.get('series', {}).get('deaths', {}).get('avg', 0),
                        'win_count': stats.get('game', {}).get('wins', {}).get('count', 0),
                        'win_percentage': stats.get('game', {}).get('wins', {}).get('percentage', 0),
                        'current_streak': stats.get('game', {}).get('wins', {}).get('streak', {}).get('current', 0),
                        'max_streak': stats.get('game', {}).get('wins', {}).get('streak', {}).get('max', 0)
                    }
                    
                    # Calculate derived statistics
                    if flat_stats['total_deaths'] > 0:
                        flat_stats['kd_ratio'] = flat_stats['total_kills'] / flat_stats['total_deaths']
                    else:
                        flat_stats['kd_ratio'] = flat_stats['total_kills']
                    
                    if flat_stats['game_count'] > 0:
                        flat_stats['kills_per_game'] = flat_stats['total_kills'] / flat_stats['game_count']
                        flat_stats['deaths_per_game'] = flat_stats['total_deaths'] / flat_stats['game_count']
                    else:
                        flat_stats['kills_per_game'] = 0
                        flat_stats['deaths_per_game'] = 0
                    
                    all_player_stats.append(flat_stats)
        except Exception as e:
            logging.warning(f"Error processing player {player_id}: {str(e)}")
            continue
    
    # Convert to DataFrame
    player_stats_df = pd.DataFrame(all_player_stats)
    
    if len(player_stats_df) > 0:
        # Merge with player info
        player_analysis = pd.merge(
            player_stats_df,
            active_players[['id', 'nickname', 'team_name']],
            left_on='player_id',
            right_on='id',
            how='left'
        )
        
        # Display summary statistics
        print("\nPlayer Statistics Summary:")
        summary_cols = ['kd_ratio', 'win_percentage', 'kills_per_game']
        print(player_analysis[summary_cols].describe())
        
        # Show top performers
        print("\nTop 10 Players Overall (min 5 games):")
        min_games = 5
        top_players = player_analysis[
            player_analysis['game_count'] >= min_games
        ].sort_values(['kd_ratio', 'win_percentage'], ascending=[False, False])[
            ['nickname', 'team_name', 'kd_ratio', 'kills_per_game', 'win_percentage', 'game_count']
        ].head(10)
        print(top_players)
        
        # Save to CSV
        timestamp = datetime.now().strftime('%Y%m%d_%H%M')
        player_analysis.to_csv(f'player_stats_{timestamp}.csv', index=False)
        print(f"\nStatistics saved to player_stats_{timestamp}.csv")
    else:
        print("No valid player statistics were collected.")
    
except Exception as e:
    logging.error(f"Error in player statistics collection: {str(e)}")
    raise

2024-11-06 22:10:45,984 - INFO - Collecting player statistics...
2024-11-06 22:10:45,986 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\players.graphql
2024-11-06 22:10:48,205 - INFO - >>> {"query": "query GetAllPlayers($first: Int!, $after: Cursor, $filter: PlayerFilter) {\n  players(first: $first, after: $after, filter: $filter) {\n    edges {\n      node {\n        id\n        nickname\n        title {\n          id\n          name\n        }\n        team {\n          id\n          name\n        }\n        private\n      }\n    }\n    pageInfo {\n      hasNextPage\n      endCursor\n    }\n  }\n}", "variables": {"filter": {"titleId": "28"}, "first": 50, "after": null}}
2024-11-06 22:10:48,898 - INFO - <<< {"data":{"players":{"edges":[{"node":{"id":"18932","nickname":"AG Nuke","title":{"id":"28","name":"Counter Strike 2"},"team":{"id":"52314","name":"CS2-1"},"private":false}},{"node":{"id":"18933","nickname":"BadjoSP","t


Found 483 active players

Collecting player statistics...


  0%|          | 0/483 [00:00<?, ?it/s]

2024-11-06 22:12:40,290 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\player_statistics.graphql
2024-11-06 22:12:42,534 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n    deprecationReason\n  }\n  possibleTypes {\n   

KeyboardInterrupt: 