## Necessary libraries and imports

In [4]:
# HTTP requests
import requests

# Data manipulation and analysis
import pandas as pd

# Numerical processing
import numpy as np

# Regular expressions for text processing
import re

# Natural Language Processing Toolkit
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Machine Learning
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import NMF, LatentDirichletAllocation, TruncatedSVD


In [16]:
import collections

In [11]:
import json
from urllib.parse import urlparse

In [5]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\micha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\micha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\micha\AppData\Roaming\nltk_data...


True

In [6]:
pip install requests pandas numpy nltk scikit-learn pyLDAvis

Collecting pyLDAvis
  Using cached pyLDAvis-3.4.1-py3-none-any.whl (2.6 MB)
Collecting joblib
  Using cached joblib-1.2.0-py3-none-any.whl (297 kB)
Collecting gensim
  Using cached gensim-4.3.1-cp39-cp39-win_amd64.whl (24.0 MB)
Collecting pandas
  Using cached pandas-2.0.2-cp39-cp39-win_amd64.whl (10.7 MB)
Collecting funcy
  Using cached funcy-2.0-py2.py3-none-any.whl (30 kB)
Installing collected packages: joblib, pandas, gensim, funcy, pyLDAvis
  Attempting uninstall: joblib
    Found existing installation: joblib 1.1.0
    Uninstalling joblib-1.1.0:
      Successfully uninstalled joblib-1.1.0
  Attempting uninstall: pandas
    Found existing installation: pandas 1.3.4
    Uninstalling pandas-1.3.4:
      Successfully uninstalled pandas-1.3.4
Successfully installed funcy-2.0 gensim-4.3.1 joblib-1.2.0 pandas-2.0.2 pyLDAvis-3.4.1
Note: you may need to restart the kernel to use updated packages.


## Data Retrieval from PokeAPI(Pokemon Themselves)

### Retrieval of pokemon names

In [7]:
# Putting in URL
url = 'https://pokeapi.co/api/v2/pokemon/1'

# Send GET request to PokeAPI
response = requests.get(url)

# Check that the request was successful
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print(f"Request failed with status code {response.status_code}")

{'abilities': [{'ability': {'name': 'overgrow', 'url': 'https://pokeapi.co/api/v2/ability/65/'}, 'is_hidden': False, 'slot': 1}, {'ability': {'name': 'chlorophyll', 'url': 'https://pokeapi.co/api/v2/ability/34/'}, 'is_hidden': True, 'slot': 3}], 'base_experience': 64, 'forms': [{'name': 'bulbasaur', 'url': 'https://pokeapi.co/api/v2/pokemon-form/1/'}], 'game_indices': [{'game_index': 153, 'version': {'name': 'red', 'url': 'https://pokeapi.co/api/v2/version/1/'}}, {'game_index': 153, 'version': {'name': 'blue', 'url': 'https://pokeapi.co/api/v2/version/2/'}}, {'game_index': 153, 'version': {'name': 'yellow', 'url': 'https://pokeapi.co/api/v2/version/3/'}}, {'game_index': 1, 'version': {'name': 'gold', 'url': 'https://pokeapi.co/api/v2/version/4/'}}, {'game_index': 1, 'version': {'name': 'silver', 'url': 'https://pokeapi.co/api/v2/version/5/'}}, {'game_index': 1, 'version': {'name': 'crystal', 'url': 'https://pokeapi.co/api/v2/version/6/'}}, {'game_index': 1, 'version': {'name': 'ruby', 

## Data Cleaning, Tokenization, and Normalization 

In [25]:
if response.status_code == 200:
    data = response.json()
    #Cleaning: Remove URL fields
    def clean_data(data):
        for key in list(data.keys()):
            if type(data[key]) is dict:
                clean_data(data[key])
            elif type(data[key]) is list:
                for item in data[key]:
                    if type(item) is dict:
                        clean_data(item)
            elif 'url' in key:
                del data[key]

    clean_data(data)

    # Tokenization: Flatten data
    def tokenize_data(data, prefix=''):
        tokens = []
        for key in data:
            if isinstance(data[key], dict):
                tokens.extend(tokenize_data(data[key], f'{prefix}{key}_'))
            elif isinstance(data[key], list):
                for i, item in enumerate(data[key]):
                    if isinstance(item, dict):
                        tokens.extend(tokenize_data(item, f'{prefix}{key}_{i}_'))
                    else:
                        tokens.append((f'{prefix}{key}_{i}', item))
            else:
                tokens.append((f'{prefix}{key}', data[key]))
        return tokens

    tokens = tokenize_data(data)

    # Normalization: Tokens to lowercase and remove special characters
    def normalize_tokens(tokens):
        normalized_tokens = []
        for key, value in tokens:
            key = key.lower().replace('-', '_')
            value = str(value).lower().replace('-', '_')
            normalized_tokens.append((key, value))
        return normalized_tokens

    normalized_tokens = normalize_tokens(tokens)

    print(json.dumps(normalized_tokens, indent=2))

else:
    print(f"Request failed with status code {response.status_code}")

[
  [
    "abilities_0_ability_name",
    "pressure"
  ],
  [
    "abilities_0_is_hidden",
    "false"
  ],
  [
    "abilities_0_slot",
    "1"
  ],
  [
    "abilities_1_ability_name",
    "unnerve"
  ],
  [
    "abilities_1_is_hidden",
    "true"
  ],
  [
    "abilities_1_slot",
    "3"
  ],
  [
    "base_experience",
    "340"
  ],
  [
    "forms_0_name",
    "mewtwo"
  ],
  [
    "game_indices_0_game_index",
    "131"
  ],
  [
    "game_indices_0_version_name",
    "red"
  ],
  [
    "game_indices_1_game_index",
    "131"
  ],
  [
    "game_indices_1_version_name",
    "blue"
  ],
  [
    "game_indices_2_game_index",
    "131"
  ],
  [
    "game_indices_2_version_name",
    "yellow"
  ],
  [
    "game_indices_3_game_index",
    "150"
  ],
  [
    "game_indices_3_version_name",
    "gold"
  ],
  [
    "game_indices_4_game_index",
    "150"
  ],
  [
    "game_indices_4_version_name",
    "silver"
  ],
  [
    "game_indices_5_game_index",
    "150"
  ],
  [
    "game_indices_5_version_

In [9]:
#Testing it out
print(data['name'])  # Prints the name of the Pokémon
print(data['height'])  # Prints the height of the Pokémon

bulbasaur
7


### retrieval for items, moves and abilities

In [10]:
# URLs of items, moves, and abilities
item_url = 'https://pokeapi.co/api/v2/item'
move_url = 'https://pokeapi.co/api/v2/move'
ability_url = 'https://pokeapi.co/api/v2/ability'

item_response = requests.get(item_url)
move_response = requests.get(move_url)
ability_response = requests.get(ability_url)

if item_response.status_code == 200:
    item_data = item_response.json()
    print(item_data)
else:
    print(f"Item request failed with status code {item_response.status_code}")

if move_response.status_code == 200:
    move_data = move_response.json()
    print(move_data)
else:
    print(f"Move request failed with status code {move_response.status_code}")

if ability_response.status_code == 200:
    ability_data = ability_response.json()
    print(ability_data)
else:
    print(f"Ability request failed with status code {ability_response.status_code}")

{'count': 2050, 'next': 'https://pokeapi.co/api/v2/item?offset=20&limit=20', 'previous': None, 'results': [{'name': 'master-ball', 'url': 'https://pokeapi.co/api/v2/item/1/'}, {'name': 'ultra-ball', 'url': 'https://pokeapi.co/api/v2/item/2/'}, {'name': 'great-ball', 'url': 'https://pokeapi.co/api/v2/item/3/'}, {'name': 'poke-ball', 'url': 'https://pokeapi.co/api/v2/item/4/'}, {'name': 'safari-ball', 'url': 'https://pokeapi.co/api/v2/item/5/'}, {'name': 'net-ball', 'url': 'https://pokeapi.co/api/v2/item/6/'}, {'name': 'dive-ball', 'url': 'https://pokeapi.co/api/v2/item/7/'}, {'name': 'nest-ball', 'url': 'https://pokeapi.co/api/v2/item/8/'}, {'name': 'repeat-ball', 'url': 'https://pokeapi.co/api/v2/item/9/'}, {'name': 'timer-ball', 'url': 'https://pokeapi.co/api/v2/item/10/'}, {'name': 'luxury-ball', 'url': 'https://pokeapi.co/api/v2/item/11/'}, {'name': 'premier-ball', 'url': 'https://pokeapi.co/api/v2/item/12/'}, {'name': 'dusk-ball', 'url': 'https://pokeapi.co/api/v2/item/13/'}, {'nam

In [14]:
item_names = [result['name'] for result in item_data['results']]
move_names = [result['name'] for result in move_data['results']]
ability_names = [result['name'] for result in ability_data['results']]

## Descriptive stats

### Descriptive Stats for Pokemon

In [15]:
# Lists to hold base experience and weight data
base_exp_data = []
weight_data = []

# Base experience and weight for first 150 Pokémon
for i in range(1, 151):
    url = f'https://pokeapi.co/api/v2/pokemon/{i}'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        clean_data(data)
        tokens = tokenize_data(data)
        normalized_tokens = normalize_tokens(tokens)
        # Convert normalized tokens to dictionary
        token_dict = dict(normalized_tokens)
        base_exp_data.append(int(token_dict['base_experience']))
        weight_data.append(int(token_dict['weight']))
    else:
        print(f'Request failed for Pokémon ID {i} with status code {response.status_code}')

# Convert lists to pandas DataFrame
df = pd.DataFrame({'Base Experience': base_exp_data, 'Weight': weight_data})

# Descriptive statistics
print(df.describe())

       Base Experience       Weight
count       150.000000   150.000000
mean        137.486667   462.313333
std          72.590488   595.473881
min          39.000000     1.000000
25%          65.000000    99.250000
50%         142.000000   300.000000
75%         175.000000   563.750000
max         395.000000  4600.000000


### Descriptive Stats for Items, Movves, and Ablities

In [17]:
item_counts = collections.Counter(item_names)
move_counts = collections.Counter(move_names)
ability_counts = collections.Counter(ability_names)

# Print counts
print("Item counts:", item_counts)
print("Move counts:", move_counts)
print("Ability counts:", ability_counts)

Item counts: Counter({'master-ball': 1, 'ultra-ball': 1, 'great-ball': 1, 'poke-ball': 1, 'safari-ball': 1, 'net-ball': 1, 'dive-ball': 1, 'nest-ball': 1, 'repeat-ball': 1, 'timer-ball': 1, 'luxury-ball': 1, 'premier-ball': 1, 'dusk-ball': 1, 'heal-ball': 1, 'quick-ball': 1, 'cherish-ball': 1, 'potion': 1, 'antidote': 1, 'burn-heal': 1, 'ice-heal': 1})
Move counts: Counter({'pound': 1, 'karate-chop': 1, 'double-slap': 1, 'comet-punch': 1, 'mega-punch': 1, 'pay-day': 1, 'fire-punch': 1, 'ice-punch': 1, 'thunder-punch': 1, 'scratch': 1, 'vice-grip': 1, 'guillotine': 1, 'razor-wind': 1, 'swords-dance': 1, 'cut': 1, 'gust': 1, 'wing-attack': 1, 'whirlwind': 1, 'fly': 1, 'bind': 1})
Ability counts: Counter({'stench': 1, 'drizzle': 1, 'speed-boost': 1, 'battle-armor': 1, 'sturdy': 1, 'damp': 1, 'limber': 1, 'sand-veil': 1, 'static': 1, 'volt-absorb': 1, 'water-absorb': 1, 'oblivious': 1, 'cloud-nine': 1, 'compound-eyes': 1, 'insomnia': 1, 'color-change': 1, 'immunity': 1, 'flash-fire': 1, 's

In [18]:
# Number of uniques for each
print("Number of unique items:", len(set(item_names)))
print("Number of unique moves:", len(set(move_names)))
print("Number of unique abilities:", len(set(ability_names)))

Number of unique items: 20
Number of unique moves: 20
Number of unique abilities: 20


In [19]:
# Common name
print("Most common item:", item_counts.most_common(1))
print("Most common move:", move_counts.most_common(1))
print("Most common ability:", ability_counts.most_common(1))

Most common item: [('master-ball', 1)]
Most common move: [('pound', 1)]
Most common ability: [('stench', 1)]


In [20]:
# Proportion of each category
item_total = sum(item_counts.values())
item_proportions = {k: v / item_total for k, v in item_counts.items()}

move_total = sum(move_counts.values())
move_proportions = {k: v / move_total for k, v in move_counts.items()}

ability_total = sum(ability_counts.values())
ability_proportions = {k: v / ability_total for k, v in ability_counts.items()}

print("Item proportions:", item_proportions)
print("Move proportions:", move_proportions)
print("Ability proportions:", ability_proportions)

Item proportions: {'master-ball': 0.05, 'ultra-ball': 0.05, 'great-ball': 0.05, 'poke-ball': 0.05, 'safari-ball': 0.05, 'net-ball': 0.05, 'dive-ball': 0.05, 'nest-ball': 0.05, 'repeat-ball': 0.05, 'timer-ball': 0.05, 'luxury-ball': 0.05, 'premier-ball': 0.05, 'dusk-ball': 0.05, 'heal-ball': 0.05, 'quick-ball': 0.05, 'cherish-ball': 0.05, 'potion': 0.05, 'antidote': 0.05, 'burn-heal': 0.05, 'ice-heal': 0.05}
Move proportions: {'pound': 0.05, 'karate-chop': 0.05, 'double-slap': 0.05, 'comet-punch': 0.05, 'mega-punch': 0.05, 'pay-day': 0.05, 'fire-punch': 0.05, 'ice-punch': 0.05, 'thunder-punch': 0.05, 'scratch': 0.05, 'vice-grip': 0.05, 'guillotine': 0.05, 'razor-wind': 0.05, 'swords-dance': 0.05, 'cut': 0.05, 'gust': 0.05, 'wing-attack': 0.05, 'whirlwind': 0.05, 'fly': 0.05, 'bind': 0.05}
Ability proportions: {'stench': 0.05, 'drizzle': 0.05, 'speed-boost': 0.05, 'battle-armor': 0.05, 'sturdy': 0.05, 'damp': 0.05, 'limber': 0.05, 'sand-veil': 0.05, 'static': 0.05, 'volt-absorb': 0.05, '

In [21]:
# Sorting categories by frequencies
sorted_item_counts = sorted(item_counts.items(), key=lambda x: x[1], reverse=True)
sorted_move_counts = sorted(move_counts.items(), key=lambda x: x[1], reverse=True)
sorted_ability_counts = sorted(ability_counts.items(), key=lambda x: x[1], reverse=True)

print("Items sorted by count:", sorted_item_counts)
print("Moves sorted by count:", sorted_move_counts)
print("Abilities sorted by count:", sorted_ability_counts)

Items sorted by count: [('master-ball', 1), ('ultra-ball', 1), ('great-ball', 1), ('poke-ball', 1), ('safari-ball', 1), ('net-ball', 1), ('dive-ball', 1), ('nest-ball', 1), ('repeat-ball', 1), ('timer-ball', 1), ('luxury-ball', 1), ('premier-ball', 1), ('dusk-ball', 1), ('heal-ball', 1), ('quick-ball', 1), ('cherish-ball', 1), ('potion', 1), ('antidote', 1), ('burn-heal', 1), ('ice-heal', 1)]
Moves sorted by count: [('pound', 1), ('karate-chop', 1), ('double-slap', 1), ('comet-punch', 1), ('mega-punch', 1), ('pay-day', 1), ('fire-punch', 1), ('ice-punch', 1), ('thunder-punch', 1), ('scratch', 1), ('vice-grip', 1), ('guillotine', 1), ('razor-wind', 1), ('swords-dance', 1), ('cut', 1), ('gust', 1), ('wing-attack', 1), ('whirlwind', 1), ('fly', 1), ('bind', 1)]
Abilities sorted by count: [('stench', 1), ('drizzle', 1), ('speed-boost', 1), ('battle-armor', 1), ('sturdy', 1), ('damp', 1), ('limber', 1), ('sand-veil', 1), ('static', 1), ('volt-absorb', 1), ('water-absorb', 1), ('oblivious', 

In [22]:
# Checking for specific categories
specific_item = 'poke-ball'
specific_move = 'tackle'
specific_ability = 'overgrow'

print(f"Count of {specific_item}:", item_counts[specific_item])
print(f"Count of {specific_move}:", move_counts[specific_move])
print(f"Count of {specific_ability}:", ability_counts[specific_ability])

Count of poke-ball: 1
Count of tackle: 0
Count of overgrow: 0
