## Imports

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import csv 
import requests
import json
import datetime
import time

## Fetching data for analysis

In [2]:
response = requests.get('https://api.pokemontcg.io/v2/cards')

In [3]:
# Initialize an empty list to store all cards
all_cards = []

# Loop through all pages
for page in range(1, 71):
    # Specify the page number and page size
    params = {
        'page': page,  # Page number
        'pageSize': 250  # Number of results per page
    }

    # Make the request
    response = requests.get('https://api.pokemontcg.io/v2/cards', params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Add the cards from this page to the list
        all_cards.extend(response.json()['data'])
    else:
        print(f"Failed to retrieve page {page}")

    # Wait for a short period to avoid hitting rate limits
    time.sleep(1)

# Now all_cards contains the cards from all pages 

In [4]:
len(all_cards)

17500

In [5]:
#response_data = all_cards
# Only 250 cards are returned by the API. Just first page.
#cards = response_data['data']
cards = all_cards
len(cards)

17500

In [6]:
type_mapping = {
    "Colorless": 1,
    "Darkness": 2,
    "Dragon": 3,
    "Fairy": 4,
    "Fighting": 5,
    "Fire": 6,
    "Grass": 7,
    "Lightning": 8,
    "Metal": 9,
    "Psychic": 10,
    "Water": 11
}

In [7]:

clean_cards = []

for card in cards:
    try: 
        clean_card_row = {}
        clean_card_row["name"] = card["name"]
        clean_card_row["hp"] = card["hp"]
        clean_card_row["average_price"] = card["cardmarket"]["prices"]["averageSellPrice"] # in dollars.
        clean_card_row["age"] = card.get("set", {}).get("releaseDate", "No release date available")
        clean_card_row["converted_retreat_cost"] = card["convertedRetreatCost"]
        clean_card_row['pokedex_nr'] = card['nationalPokedexNumbers'][0]
        clean_card_row["type"] = type_mapping.get(card["types"][0],0)
        # Calculate age of card in days
        release_date = datetime.datetime.strptime(clean_card_row["age"], "%Y/%m/%d").date()  
        # Get today's date
        today = datetime.date.today()
        clean_card_row["age"] = (today - release_date).days
        # get damage, so we take the mean of the attacks. 
        damage = 0
        converted_energy_cost = 0
        for attack in card["attacks"]:
            damage_str = ''.join(filter(str.isdigit, attack["damage"]))
            if not damage_str:
                continue
            damage += int(damage_str)
            converted_energy_cost += int(attack["convertedEnergyCost"])

        clean_card_row["damage"] = round(damage / len(card["attacks"])) # round up to whole numbers
        clean_card_row["converted_energy_cost"] = converted_energy_cost / len(card["attacks"])
        
        resistance = 0
        if "resistances" in card and card["resistances"]:
            if ("×" in card["resistances"][0]["value"]):
                filtered_string = ''.join([char for char in card["resistances"][0]["value"] if char.isdigit()])
                resulting_int = int(filtered_string)
                resistance = int(card["hp"]) * resulting_int
            if ("-" in card["resistances"][0]["value"]):
                filtered_string = ''.join([char for char in card["resistances"][0]["value"] if char.isdigit()])
                resulting_int = int(filtered_string)
                resistance = int(card["hp"]) + resulting_int
        else:
            resistance = int(card["hp"])
        clean_card_row["resistance"] = resistance

        weakness = 0
        if "weaknesses" in card and card["weaknesses"]:
            if ("×" in card["weaknesses"][0]["value"]):
                filtered_string = ''.join([char for char in card["weaknesses"][0]["value"] if char.isdigit()])
                resulting_int = int(filtered_string)
                weakness = int(card["hp"]) * resulting_int
            if ("+" in card["weaknesses"][0]["value"]):
                filtered_string = ''.join([char for char in card["weaknesses"][0]["value"] if char.isdigit()])
                resulting_int = int(filtered_string)
                weakness = int(card["hp"]) + resulting_int
            if ("-" in card["weaknesses"][0]["value"]):
                filtered_string = ''.join([char for char in card["weaknesses"][0]["value"] if char.isdigit()])
                resulting_int = int(filtered_string)
                weakness = int(card["hp"]) - resulting_int
        else:
            weakness = int(card["hp"])
        clean_card_row["weakness"] = weakness

        
    except (KeyError, IndexError, ValueError):
        #print(KeyError, IndexError, ValueError)
        continue
    
    print(clean_card_row)
    clean_cards.append(clean_card_row)


print(len(clean_cards))
df = pd.DataFrame(clean_cards)
df = df[df["average_price"] != 0]
df.to_csv('cards.csv', mode='w', index=False)
    

{'name': 'Ampharos', 'hp': '130', 'average_price': 1.87, 'age': 6048, 'converted_retreat_cost': 3, 'pokedex_nr': 181, 'type': 8, 'damage': 70, 'converted_energy_cost': 3.0, 'resistance': 150, 'weakness': 160}
{'name': 'Caterpie', 'hp': '50', 'average_price': 1.85, 'age': 1682, 'converted_retreat_cost': 1, 'pokedex_nr': 10, 'type': 7, 'damage': 20, 'converted_energy_cost': 1.0, 'resistance': 50, 'weakness': 100}
{'name': 'Azumarill', 'hp': '80', 'average_price': 6.04, 'age': 7143, 'converted_retreat_cost': 1, 'pokedex_nr': 184, 'type': 11, 'damage': 20, 'converted_energy_cost': 2.0, 'resistance': 80, 'weakness': 160}
{'name': 'Celebi & Venusaur-GX', 'hp': '270', 'average_price': 12.19, 'age': 1938, 'converted_retreat_cost': 4, 'pokedex_nr': 3, 'type': 7, 'damage': 127, 'converted_energy_cost': 3.6666666666666665, 'resistance': 270, 'weakness': 540}
{'name': 'Blastoise', 'hp': '130', 'average_price': 10.0, 'age': 5580, 'converted_retreat_cost': 2, 'pokedex_nr': 9, 'type': 11, 'damage': 1