In [1]:
# Imports and Setup
import os
import sys
import logging
from time import sleep

# Add the grid_collector directory to Python path
grid_collector_path = os.path.dirname(os.getcwd())  # Go up one directory to grid_collector
if grid_collector_path not in sys.path:
    sys.path.insert(0, grid_collector_path)  # Insert at beginning of path for priority
    print(f"Added to path: {grid_collector_path}")

from dotenv import load_dotenv
from src.collector.grid_collector import GridCollector
import logging
import pandas as pd
from datetime import datetime, timedelta

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

Added to path: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector


In [2]:
# Configuration and Initialization
# Load configuration
load_dotenv()
api_key = os.getenv('GRID_API_KEY')
if not api_key:
    raise ValueError("GRID_API_KEY environment variable is not set")

try:
    # Initialize collector
    collector = GridCollector(api_key)
    logging.info("Successfully initialized GridCollector")
except Exception as e:
    logging.error(f"Error initializing collector: {str(e)}")
    raise



2024-11-13 21:04:06,911 - INFO - Successfully initialized GridCollector


In [3]:
def verify_queries():
    # Get path to queries directory
    notebook_dir = os.getcwd()
    grid_collector_dir = os.path.dirname(notebook_dir)
    queries_dir = os.path.join(grid_collector_dir, 'queries')
    
    print(f"Checking queries directory: {queries_dir}")
    
    # List of required query files
    required_queries = [
        'match_player_stats.graphql',
        'team_statistics.graphql',
        'tournaments.graphql',
        'matches.graphql',
        'players.graphql',
        'player_statistics.graphql',
        'titles.graphql',
        'comprehensive_player_stats.graphql',
    ]
    
    # Check each file
    print("\nChecking query files:")
    for query in required_queries:
        path = os.path.join(queries_dir, query)
        exists = os.path.exists(path)
        print(f"{'✓' if exists else '✗'} {query}")
        if not exists:
            print(f"  Missing file should be at: {path}")
            
    # If any files are missing, show how to create them
    if any(not os.path.exists(os.path.join(queries_dir, q)) for q in required_queries):
        print("\nMissing query files! Here's how to create them:")
        print(f"1. Create directory: {queries_dir}")
        print("2. Create the following files with their queries:")
        for query in required_queries:
            print(f"   - {query}")

# Run verification
verify_queries()

Checking queries directory: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries

Checking query files:
✓ match_player_stats.graphql
✓ team_statistics.graphql
✓ tournaments.graphql
✓ matches.graphql
✓ players.graphql
✓ player_statistics.graphql
✓ titles.graphql
✓ comprehensive_player_stats.graphql


In [None]:
# Fetch all titles
titles_df = collector.get_all_titles()

# Display the titles
print("Available Titles:")
print(titles_df)

In [None]:
# Save the titles to a CSV file
# Create the path to save the CSV
save_path = os.path.join('..', 'data', 'titles.csv')

# Save the dataframe to CSV
titles_df.to_csv(save_path, index=False)

print(f"Titles saved to: {save_path}")

In [None]:
# Collect Player Data
try:
    logging.info("Collecting player data...")
    players_df = collector.get_players(limit=5)  # 'title_id' defaults to '28' for CS2, limit to 5 for sample remove for all
    logging.info(f"Collected {len(players_df)} players")
    
    # Display sample of players
    print("\nPlayers Sample:")
    print(players_df.head())
    
except Exception as e:
    logging.error(f"Error collecting player data: {str(e)}")
    raise



In [None]:
# Save players data to CSV in the data folder
save_path = os.path.join('..', 'data', 'players.csv')

# Save the dataframe to CSV
players_df.to_csv(save_path, index=False)

print(f"Players data saved to: {save_path}")

In [21]:
# Collect Tournament Data
try:
    tournaments_df = collector.get_tournaments()
    print(f"\nCollected {len(tournaments_df)} tournaments")
    print("\nSample data:")
    print(tournaments_df.head())
except Exception as e:
    print(f"Error: {str(e)}")

2024-11-07 17:34:32,558 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\tournaments.graphql
2024-11-07 17:34:32,564 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n    deprecationReason\n  }\n  possibleTypes {\n    ...Ty

KeyboardInterrupt: 

In [None]:
# Save tournaments data to CSV in the data folder
save_path = os.path.join('..', 'data', 'tournaments.csv')

# Save the dataframe to CSV
tournaments_df.to_csv(save_path, index=False)

print(f"Tournaments data saved to: {save_path}")

Tournaments data saved to: ..\data\tournaments.csv


In [None]:
# Collect Match Data
try:
    logging.info("Collecting match data...")
    #ONLY CHANGE THE NUMBER OF DAYS BELOW PLEASE!!!
    matches_df = collector.get_matches(days=7) # Change days to collect more matches
    logging.info(f"Collected {len(matches_df)} matches")
    
    # Basic match analysis
    print("\nMatch Statistics:")
    print(f"Total matches collected: {len(matches_df)}")
    
    print(f"\nMatches by tournament:")
    print(matches_df['tournament_name'].value_counts())
    
    print(f"\nMatches by format:")
    print(matches_df['format_name'].value_counts())
    
    # Display sample of recent matches with key information
    print("\nRecent Matches Sample:")
    display_columns = [
        'id', 'tournament_name', 'team1_name', 'team2_name', 
        'format_short', 'start_time'
    ]
    print(matches_df[display_columns].head())
    
except Exception as e:
    logging.error(f"Error collecting match data: {str(e)}")
    raise

2024-11-07 17:15:48,297 - INFO - Collecting match data...
2024-11-07 17:15:48,298 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\matches.graphql
2024-11-07 17:15:50,525 - INFO - >>> {"query": "query GetRecentMatches($first: Int!, $after: Cursor) {\n  allSeries(\n    first: $first\n    after: $after\n    filter: {startTimeScheduled: {gte: \"2024-10-31T17:00:00+00:00\", lte: \"2024-11-07T17:00:00+00:00\"}}\n    orderBy: StartTimeScheduled\n  ) {\n    totalCount\n    edges {\n      cursor\n      node {\n        id\n        title {\n          nameShortened\n        }\n        tournament {\n          nameShortened\n        }\n        startTimeScheduled\n        format {\n          name\n          nameShortened\n        }\n        teams {\n          baseInfo {\n            id\n            name\n          }\n          scoreAdvantage\n        }\n      }\n    }\n    pageInfo {\n      hasPreviousPage\n      hasNextPage\n      startC


Match Statistics:
Total matches collected: 109

Matches by tournament:
tournament_name
Esports Battles Nov 2024 CS2    27
CCT S2 - South America S4       24
DBE 2024                        16
EPL S20 - CS2                   14
United21 Season 22              14
A1 Gaming League S9              4
Thunderpick WC 2024 - Finals     4
Ljósleiðara deildin 2024         2
GRID-TEST                        2
T-esports Championship S3        2
Name: count, dtype: int64

Matches by format:
format_name
best-of-3    90
best-of-1    16
best-of-5     3
Name: count, dtype: int64

Recent Matches Sample:
        id               tournament_name     team1_name     team2_name  \
0  2732480     CCT S2 - South America S4     Vikings KR          VeloX   
1  2735320      Ljósleiðara deildin 2024            Þór          DUSTY   
2  2732481     CCT S2 - South America S4        ShindeN  Tropa do Taco   
3  2732482     CCT S2 - South America S4  JOGA DE TERNO   paiN Academy   
4  2738562  Esports Battles Nov 2024

In [None]:
# Save matches data to CSV in the data folder
save_path = os.path.join('..', 'data', 'matches.csv')

# Save the dataframe to CSV
matches_df.to_csv(save_path, index=False)

print(f"Matches data saved to: {save_path}")

Matches data saved to: ..\data\matches.csv


In [25]:
# Collect Player Statistics
try:
    logging.info("Collecting player statistics...")
    
    # Get all players from recent matches
    all_team_ids = pd.concat([
        matches_df['team1_id'],
        matches_df['team2_id']
    ]).unique()
    
    # Get players from these teams
    players_df = collector.get_players()
    active_players = players_df[players_df['team_id'].isin(all_team_ids)]
    print(f"\nFound {len(active_players)} active players")
    
    # Collect statistics for active players one by one
    print("\nCollecting player statistics...")
    all_player_stats = []
    
    # Use tqdm for progress bar
    from tqdm.notebook import tqdm
    
    # Add retry mechanism
    max_retries = 3
    base_retry_delay = 5
    
    for player_id in tqdm(active_players['id'].tolist()):
        retry_count = 0
        while retry_count < max_retries:
            try:
                # Make sure to respect rate limits
                collector.rate_limiter.wait()
                
                # Properly structure the filter object
                variables = {
                    'playerId': player_id,
                    'filter': {
                        'timeWindow': 'LAST_3_MONTHS'
                    }
                }
                
                result = collector._execute_query(
                    collector._load_query('player_statistics'),
                    variables,
                    collector.stats_client
                )
                
                if result and 'playerStatistics' in result:
                    stats = result['playerStatistics']
                    if stats:
                        # Handle series data
                        series_stats = stats.get('series', {})
                        game_stats = stats.get('game', {})
                        
                        # Safely extract nested values
                        kills = series_stats.get('kills', {}) if isinstance(series_stats, dict) else {}
                        deaths = series_stats.get('deaths', {}) if isinstance(series_stats, dict) else {}
                        wins = game_stats.get('wins', {}) if isinstance(game_stats, dict) else {}
                        
                        flat_stats = {
                            'player_id': player_id,
                            'series_count': series_stats.get('count', 0) if isinstance(series_stats, dict) else 0,
                            'game_count': game_stats.get('count', 0) if isinstance(game_stats, dict) else 0
                        }
                        
                        # Safely add kill statistics
                        if isinstance(kills, dict):
                            flat_stats.update({
                                'total_kills': kills.get('sum', 0),
                                'avg_kills': kills.get('avg', 0),
                                'max_kills': kills.get('max', 0),
                                'min_kills': kills.get('min', 0)
                            })
                        
                        # Safely add death statistics
                        if isinstance(deaths, dict):
                            flat_stats.update({
                                'total_deaths': deaths.get('sum', 0),
                                'avg_deaths': deaths.get('avg', 0),
                                'max_deaths': deaths.get('max', 0),
                                'min_deaths': deaths.get('min', 0)
                            })
                        
                        # Safely add win statistics
                        if isinstance(wins, dict):
                            flat_stats.update({
                                'win_count': wins.get('count', 0),
                                'win_percentage': wins.get('percentage', 0)
                            })
                            
                            # Safely handle streak data
                            streak = wins.get('streak', {})
                            if isinstance(streak, dict):
                                flat_stats.update({
                                    'current_streak': streak.get('current', 0),
                                    'max_streak': streak.get('max', 0)
                                })
                        
                        # Calculate derived statistics only if we have valid data
                        try:
                            if flat_stats.get('total_deaths', 0) > 0:
                                flat_stats['kd_ratio'] = flat_stats.get('total_kills', 0) / flat_stats['total_deaths']
                            else:
                                flat_stats['kd_ratio'] = flat_stats.get('total_kills', 0)
                            
                            if flat_stats.get('game_count', 0) > 0:
                                flat_stats['kills_per_game'] = flat_stats.get('total_kills', 0) / flat_stats['game_count']
                                flat_stats['deaths_per_game'] = flat_stats.get('total_deaths', 0) / flat_stats['game_count']
                            else:
                                flat_stats['kills_per_game'] = 0
                                flat_stats['deaths_per_game'] = 0
                        except Exception as calc_error:
                            logging.warning(f"Error calculating derived stats for player {player_id}: {str(calc_error)}")
                            flat_stats.update({
                                'kd_ratio': 0,
                                'kills_per_game': 0,
                                'deaths_per_game': 0
                            })
                        
                        all_player_stats.append(flat_stats)
                        break  # Success, exit retry loop
                    
            except Exception as e:
                retry_count += 1
                logging.warning(f"Error processing player {player_id} (attempt {retry_count}): {str(e)}")
                if retry_count < max_retries:
                    wait_time = base_retry_delay * retry_count
                    logging.info(f"Waiting {wait_time} seconds before retry...")
                    sleep(wait_time)
                else:
                    logging.error(f"Failed to process player {player_id} after {max_retries} attempts")
            
        # Save progress every 50 players
        if len(all_player_stats) > 0 and len(all_player_stats) % 50 == 0:
            temp_df = pd.DataFrame(all_player_stats)
            temp_df.to_csv(f'player_stats_temp_{datetime.now().strftime("%Y%m%d_%H%M")}.csv', index=False)
            logging.info(f"Saved temporary progress - {len(all_player_stats)} players processed")
    
    # Final processing
    if len(all_player_stats) > 0:
        player_stats_df = pd.DataFrame(all_player_stats)
        
        # Merge with player info
        player_analysis = pd.merge(
            player_stats_df,
            active_players[['id', 'nickname', 'team_name']],
            left_on='player_id',
            right_on='id',
            how='left'
        )
        
        # Save final results
        timestamp = datetime.now().strftime('%Y%m%d_%H%M')
        player_analysis.to_csv(f'player_stats_{timestamp}.csv', index=False)
        print(f"\nStatistics saved to player_stats_{timestamp}.csv")
    else:
        print("No valid player statistics were collected.")
    
except Exception as e:
    logging.error(f"Error in player statistics collection: {str(e)}")
    raise

2024-11-07 17:42:12,367 - INFO - Collecting player statistics...
2024-11-07 17:42:12,368 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\players.graphql
2024-11-07 17:42:12,372 - INFO - >>> {"query": "query GetAllPlayers($first: Int!, $after: Cursor, $filter: PlayerFilter) {\n  players(first: $first, after: $after, filter: $filter) {\n    edges {\n      node {\n        id\n        nickname\n        title {\n          id\n          name\n        }\n        team {\n          id\n          name\n        }\n        private\n      }\n    }\n    pageInfo {\n      hasNextPage\n      endCursor\n    }\n  }\n}", "variables": {"filter": {"titleId": "28"}, "first": 50, "after": null}}
2024-11-07 17:42:13,032 - INFO - <<< {"data":{"players":{"edges":[{"node":{"id":"18932","nickname":"AG Nuke","title":{"id":"28","name":"Counter Strike 2"},"team":{"id":"52314","name":"CS2-1"},"private":false}},{"node":{"id":"18933","nickname":"BadjoSP","t


Found 483 active players

Collecting player statistics...


  0%|          | 0/483 [00:00<?, ?it/s]

2024-11-07 17:44:06,726 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\player_statistics.graphql
2024-11-07 17:44:09,745 - INFO - >>> {"query": "query PlayerStatistics($playerId: ID!, $filter: PlayerStatisticsFilter!) {\n  playerStatistics(playerId: $playerId, filter: $filter) {\n    id\n    series {\n      count\n      kills {\n        sum\n        min\n        max\n        avg\n      }\n      deaths {\n        sum\n        min\n        max\n        avg\n      }\n    }\n    game {\n      count\n      wins {\n        count\n        percentage\n        streak {\n          current\n          max\n        }\n      }\n    }\n  }\n}", "variables": {"playerId": "18932", "filter": {"timeWindow": "LAST_3_MONTHS"}}}
2024-11-07 17:44:10,620 - INFO - <<< {"data":{"playerStatistics":{"id":"18932","series":{"count":0,"kills":{"sum":0,"min":0,"max":0,"avg":0.0},"deaths":{"sum":0,"min":0,"max":0,"avg":0.0}},"game":{"count":0,"wins":[{"co


Statistics saved to player_stats_20241107_1832.csv


In [5]:
#Comprehenisve Player Stats
# Test player stats collection
test_player_id = "19549"  # 2high from ENCE Academy

# Get the player stats
player_stats = collector.get_comprehensive_player_stats(test_player_id)

# Print basic overview
print(f"\nKey Statistics:")
print(f"Games Played: {player_stats['general']['games_played']}")
print(f"Kills: {player_stats['combat']['kills']['total']}")
print(f"Win Rate: {player_stats['performance']['wins']['percentage']}%")

# Print full stats if you want to inspect
from pprint import pprint
print("\nFull Statistics:")
pprint(player_stats)

2024-11-13 21:05:51,857 - INFO - Looking for query file at: c:\Users\Chaos\Desktop\Repo's\homework\VictorVis2.0\grid_collector\queries\comprehensive_player_stats.graphql
2024-11-13 21:05:51,870 - INFO - >>> {"query": "query IntrospectionQuery {\n  __schema {\n    queryType {\n      name\n    }\n    mutationType {\n      name\n    }\n    subscriptionType {\n      name\n    }\n    types {\n      ...FullType\n    }\n    directives {\n      name\n      description\n      locations\n      args {\n        ...InputValue\n      }\n    }\n  }\n}\n\nfragment FullType on __Type {\n  kind\n  name\n  description\n  fields(includeDeprecated: true) {\n    name\n    description\n    args {\n      ...InputValue\n    }\n    type {\n      ...TypeRef\n    }\n    isDeprecated\n    deprecationReason\n  }\n  inputFields {\n    ...InputValue\n  }\n  interfaces {\n    ...TypeRef\n  }\n  enumValues(includeDeprecated: true) {\n    name\n    description\n    isDeprecated\n    deprecationReason\n  }\n  possibleTyp


Key Statistics:
Games Played: 53
Kills: 750
Win Rate: 0%

Full Statistics:
{'combat': {'damage': {'average': 1624.0943396226414,
                       'max': 2951,
                       'total': 86077},
            'deaths': {'average': 35.63636363636363,
                       'best': 18,
                       'total': 784,
                       'worst': 47},
            'first_kills': {'percentage': 95.45454545454545, 'total': 21},
            'kills': {'average': 34.09090909090909,
                      'best': 53,
                      'total': 750,
                      'worst': 18}},
 'economy': {'inventory_value': {'average': 0, 'max': 0},
             'net_worth': {'average': 0, 'max': 0}},
 'general': {'games_played': 53, 'series_played': 22},
 'performance': {'wins': {'count': 0,
                          'current_streak': 0,
                          'max_streak': 0,
                          'percentage': 0}},
 'player_id': '19549',
 'segments': [{'combat': {'deaths': 