# ADS 509 Pokemon Text Analysis: Final Project
### Imports

In [1]:
import re
import nltk
from nltk.tokenize import word_tokenize
from flask import Flask, render_template, url_for, request
import joblib
import json
from collections import Counter
import pandas as pd
from string import punctuation
from pandas import isnull
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB

### Read in the text file

In [23]:
f = open('C:/Users/mendi/Desktop/USD Class Files/ADS 509/Pokemon_Data.txt')
data = json.load(f)
print(len(data))

1281


### Functions & Tokenize

In [3]:
# Remove punctuation
punctuation = set(punctuation) # speeds up comparison
tw_punct = punctuation - {'''#{}[],'''}
def remove_punctuation(text, punct_set=tw_punct) : 
    return("".join([ch for ch in text if ch not in punct_set]))

# Removing URL's
def remove_URL(text):
    return re.sub(r"'url ' : 'http\S+", "", text)

# Tokenize the data
def tokenize(text) :
    return(word_tokenize(text))

# Applying the pipeline
def prepare(text, pipeline) :
    tokens = str(text)
    for transform in pipeline :
        tokens = transform(tokens)
    return(tokens)

# Descriptive statistics of the data
def descriptive_stats(tokens, num_words = 5, verbose=True) :
    num_tokens = len(tokens)
    num_unique_tokens = len(Counter(tokens).keys())
    lexical_diversity = round((num_unique_tokens/num_tokens),2) # Rounded to 2 Digits to match format of existing decimal rounding below
    num_characters = sum(len(i) for i in tokens)
    countered = Counter(tokens)
    if verbose :        
        print(f"There are {num_tokens} tokens in the data.")
        print(f"There are {num_unique_tokens} unique tokens in the data.")
        print(f"There are {num_characters} characters in the data.")
        print(f"The lexical diversity is {lexical_diversity:.3f} in the data.")        
    return

In [24]:
pokemon_list = []
p_list = []
for pokemon in data:
    for features in data[pokemon]:
        #print(a) # a is the whole section of combined features
        for feature, description in features.items():
            # b is the dictionaries such as ability, weight, stats that are pulled
            #print(c) # Farthest I can break down for now.
            #p_list.append()
            poke_dict = {'pokemon': pokemon, 'feature': feature, 'description': description}
            pokemon_list.append(poke_dict)

In [25]:
df = pd.DataFrame.from_dict(pokemon_list)
df

Unnamed: 0,pokemon,feature,description
0,bulbasaur,abilities,"[{'ability': {'name': 'overgrow', 'url': 'http..."
1,bulbasaur,base_experience,64
2,bulbasaur,forms,"[{'name': 'bulbasaur', 'url': 'https://pokeapi..."
3,bulbasaur,game_indices,"[{'game_index': 153, 'version': {'name': 'red'..."
4,bulbasaur,height,7
...,...,...,...
23053,miraidon-glide-mode,species,"{'name': 'miraidon', 'url': 'https://pokeapi.c..."
23054,miraidon-glide-mode,sprites,"{'back_default': None, 'back_female': None, 'b..."
23055,miraidon-glide-mode,stats,"[{'base_stat': 100, 'effort': 0, 'stat': {'nam..."
23056,miraidon-glide-mode,types,"[{'slot': 1, 'type': {'name': 'electric', 'url..."


In [20]:
my_pipeline = [str.lower,remove_URL,tokenize]
cleaned_data = []
for row in pokemon_list :
    text = " ".join(prepare(row, pipeline = my_pipeline))
    if text :
        cleaned_data.append(text)

### Descriptive Statistics

In [7]:
descriptive_stats(cleaned_data)

There are 23058 tokens in the data.
There are 23058 unique tokens in the data.
There are 169063470 characters in the data.
The lexical diversity is 1.000 in the data.


In [26]:
descriptive_stats(df)

There are 23058 tokens in the data.
There are 3 unique tokens in the data.
There are 25 characters in the data.
The lexical diversity is 0.000 in the data.


### Feature Engineering

In [26]:
transposed_data = df.pivot(columns='feature',values='description', index='pokemon')
transposed_data.head()

feature,abilities,base_experience,forms,game_indices,height,held_items,id,is_default,location_area_encounters,moves,name,order,past_types,species,sprites,stats,types,weight
pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
abomasnow,"[{'ability': {'name': 'snow-warning', 'url': '...",173,"[{'name': 'abomasnow', 'url': 'https://pokeapi...","[{'game_index': 460, 'version': {'name': 'diam...",22,"[{'item': {'name': 'never-melt-ice', 'url': 'h...",460,True,https://pokeapi.co/api/v2/pokemon/460/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",abomasnow,585,[],"{'name': 'abomasnow', 'url': 'https://pokeapi....",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 90, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1355
abomasnow-mega,"[{'ability': {'name': 'snow-warning', 'url': '...",208,"[{'name': 'abomasnow-mega', 'url': 'https://po...",[],27,"[{'item': {'name': 'never-melt-ice', 'url': 'h...",10060,False,https://pokeapi.co/api/v2/pokemon/10060/encoun...,"[{'move': {'name': 'ice-punch', 'url': 'https:...",abomasnow-mega,586,[],"{'name': 'abomasnow', 'url': 'https://pokeapi....","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 90, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1850
abra,"[{'ability': {'name': 'synchronize', 'url': 'h...",62,"[{'name': 'abra', 'url': 'https://pokeapi.co/a...","[{'game_index': 148, 'version': {'name': 'red'...",9,"[{'item': {'name': 'twisted-spoon', 'url': 'ht...",63,True,https://pokeapi.co/api/v2/pokemon/63/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",abra,103,[],"{'name': 'abra', 'url': 'https://pokeapi.co/ap...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 25, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'psychic', 'url'...",195
absol,"[{'ability': {'name': 'pressure', 'url': 'http...",163,"[{'name': 'absol', 'url': 'https://pokeapi.co/...","[{'game_index': 376, 'version': {'name': 'ruby...",12,"[{'item': {'name': 'life-orb', 'url': 'https:/...",359,True,https://pokeapi.co/api/v2/pokemon/359/encounters,"[{'move': {'name': 'scratch', 'url': 'https://...",absol,478,[],"{'name': 'absol', 'url': 'https://pokeapi.co/a...",{'back_default': 'https://raw.githubuserconten...,"[{'base_stat': 65, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",470
absol-mega,"[{'ability': {'name': 'magic-bounce', 'url': '...",198,"[{'name': 'absol-mega', 'url': 'https://pokeap...",[],12,"[{'item': {'name': 'life-orb', 'url': 'https:/...",10057,False,https://pokeapi.co/api/v2/pokemon/10057/encoun...,"[{'move': {'name': 'scratch', 'url': 'https://...",absol-mega,479,[],"{'name': 'absol', 'url': 'https://pokeapi.co/a...","{'back_default': None, 'back_female': None, 'b...","[{'base_stat': 65, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",490


In [27]:
transposed_datad = transposed_data.drop(columns = ['held_items', 'id', 'is_default', 'order', 
                                        'past_types', 'location_area_encounters', 'sprites',
                                       'forms', 'species', 'game_indices', 'stats', 'name'])
transposed_datad.head(50)

feature,abilities,base_experience,height,moves,types,weight
pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abomasnow,"[{'ability': {'name': 'snow-warning', 'url': '...",173.0,22,"[{'move': {'name': 'mega-punch', 'url': 'https...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1355
abomasnow-mega,"[{'ability': {'name': 'snow-warning', 'url': '...",208.0,27,"[{'move': {'name': 'ice-punch', 'url': 'https:...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1850
abra,"[{'ability': {'name': 'synchronize', 'url': 'h...",62.0,9,"[{'move': {'name': 'mega-punch', 'url': 'https...","[{'slot': 1, 'type': {'name': 'psychic', 'url'...",195
absol,"[{'ability': {'name': 'pressure', 'url': 'http...",163.0,12,"[{'move': {'name': 'scratch', 'url': 'https://...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",470
absol-mega,"[{'ability': {'name': 'magic-bounce', 'url': '...",198.0,12,"[{'move': {'name': 'scratch', 'url': 'https://...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",490
accelgor,"[{'ability': {'name': 'hydration', 'url': 'htt...",173.0,8,"[{'move': {'name': 'body-slam', 'url': 'https:...","[{'slot': 1, 'type': {'name': 'bug', 'url': 'h...",253
aegislash-blade,"[{'ability': {'name': 'stance-change', 'url': ...",250.0,17,"[{'move': {'name': 'swords-dance', 'url': 'htt...","[{'slot': 1, 'type': {'name': 'steel', 'url': ...",530
aegislash-shield,"[{'ability': {'name': 'stance-change', 'url': ...",250.0,17,"[{'move': {'name': 'swords-dance', 'url': 'htt...","[{'slot': 1, 'type': {'name': 'steel', 'url': ...",530
aerodactyl,"[{'ability': {'name': 'rock-head', 'url': 'htt...",180.0,18,"[{'move': {'name': 'razor-wind', 'url': 'https...","[{'slot': 1, 'type': {'name': 'rock', 'url': '...",590
aerodactyl-mega,"[{'ability': {'name': 'tough-claws', 'url': 'h...",215.0,21,"[{'move': {'name': 'wing-attack', 'url': 'http...","[{'slot': 1, 'type': {'name': 'rock', 'url': '...",790


In [8]:
transposed_datad['moves'][0]

[{'move': {'name': 'mega-punch', 'url': 'https://pokeapi.co/api/v2/move/5/'},
  'version_group_details': [{'level_learned_at': 0,
    'move_learn_method': {'name': 'machine',
     'url': 'https://pokeapi.co/api/v2/move-learn-method/4/'},
    'version_group': {'name': 'sword-shield',
     'url': 'https://pokeapi.co/api/v2/version-group/20/'}}]},
 {'move': {'name': 'ice-punch', 'url': 'https://pokeapi.co/api/v2/move/8/'},
  'version_group_details': [{'level_learned_at': 1,
    'move_learn_method': {'name': 'level-up',
     'url': 'https://pokeapi.co/api/v2/move-learn-method/1/'},
    'version_group': {'name': 'diamond-pearl',
     'url': 'https://pokeapi.co/api/v2/version-group/8/'}},
   {'level_learned_at': 1,
    'move_learn_method': {'name': 'level-up',
     'url': 'https://pokeapi.co/api/v2/move-learn-method/1/'},
    'version_group': {'name': 'platinum',
     'url': 'https://pokeapi.co/api/v2/version-group/9/'}},
   {'level_learned_at': 0,
    'move_learn_method': {'name': 'tutor',


In [None]:
# trying to use for-loops to extract the info from the list of dictionaries
column_df = []
transposed_data_moves = transposed_datad['moves']
for index in range(len(transposed_data_moves)) :
    for key in transposed_data_moves[index] :
        key = pd.concat({transposed_datad['moves'].str.get(key)})
        column_df.append(i)
        
    for s in 
        
    

In [27]:
# Adjusting the 'abilities' column
a1 = transposed_datad['abilities'].str.get(0)
b1 = transposed_datad['abilities'].str.get(1)
c1 = transposed_datad['abilities'].str.get(2)
abilities_df = pd.concat({'a1':a1, 'b1':b1, 'c1':c1}, axis = 1)
abilities_df = abilities_df.applymap(lambda x: {} if isnull(x) else x)

abilities_df['a1_ability'] = [d.get('ability') for d in abilities_df.a1]
abilities_df['b1_ability'] = [d.get('ability') for d in abilities_df.b1]
abilities_df['c1_ability'] = [d.get('ability') for d in abilities_df.c1]

abilities_df = abilities_df.applymap(lambda x: {} if isnull(x) else x)

abilities_df['a1_name'] = [d.get('name') for d in abilities_df.a1_ability]
abilities_df['b1_name'] = [d.get('name') for d in abilities_df.b1_ability]
abilities_df['c1_name'] = [d.get('name') for d in abilities_df.c1_ability]

abilities_df = abilities_df.drop(columns=['a1', 'b1', 'c1', 'a1_ability', 'b1_ability', 'c1_ability'])
abilities_df = abilities_df.fillna({column: '' for column in abilities_df.columns})

abilities_df['abilities'] = abilities_df['a1_name'] + " " + abilities_df['b1_name'] + " " + abilities_df['c1_name']
abilities_df['abilities'] = abilities_df['abilities'].str.replace('  ', ' ')
abilities_df = abilities_df.drop(columns=['a1_name', 'b1_name', 'c1_name'])

transposed_datad_abil = transposed_datad.drop(columns=['abilities'])
clean_df = pd.merge(transposed_datad_abil,abilities_df,left_index=True,right_index=True)
clean_df.head(10)

Unnamed: 0_level_0,base_experience,height,moves,types,weight,abilities
pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abomasnow,173,22,"[{'move': {'name': 'mega-punch', 'url': 'https...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1355,snow-warning soundproof
abomasnow-mega,208,27,"[{'move': {'name': 'ice-punch', 'url': 'https:...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1850,snow-warning
abra,62,9,"[{'move': {'name': 'mega-punch', 'url': 'https...","[{'slot': 1, 'type': {'name': 'psychic', 'url'...",195,synchronize inner-focus magic-guard
absol,163,12,"[{'move': {'name': 'scratch', 'url': 'https://...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",470,pressure super-luck justified
absol-mega,198,12,"[{'move': {'name': 'scratch', 'url': 'https://...","[{'slot': 1, 'type': {'name': 'dark', 'url': '...",490,magic-bounce
accelgor,173,8,"[{'move': {'name': 'body-slam', 'url': 'https:...","[{'slot': 1, 'type': {'name': 'bug', 'url': 'h...",253,hydration sticky-hold unburden
aegislash-blade,250,17,"[{'move': {'name': 'swords-dance', 'url': 'htt...","[{'slot': 1, 'type': {'name': 'steel', 'url': ...",530,stance-change
aegislash-shield,250,17,"[{'move': {'name': 'swords-dance', 'url': 'htt...","[{'slot': 1, 'type': {'name': 'steel', 'url': ...",530,stance-change
aerodactyl,180,18,"[{'move': {'name': 'razor-wind', 'url': 'https...","[{'slot': 1, 'type': {'name': 'rock', 'url': '...",590,rock-head pressure unnerve
aerodactyl-mega,215,21,"[{'move': {'name': 'wing-attack', 'url': 'http...","[{'slot': 1, 'type': {'name': 'rock', 'url': '...",790,tough-claws


In [28]:
# Adjusting the 'moves' column
a2 = transposed_datad['moves'].str.get(0)
b2 = transposed_datad['moves'].str.get(1)
c2 = transposed_datad['moves'].str.get(2)
moves_df = pd.concat({'a2':a2, 'b2':b2, 'c2':c2}, axis = 1)
moves_df = moves_df.applymap(lambda x: {} if isnull(x) else x)

moves_df['a2_moves'] = [d.get('move') for d in moves_df.a2]
moves_df['b2_moves'] = [d.get('move') for d in moves_df.b2]
moves_df['c2_moves'] = [d.get('move') for d in moves_df.c2]

moves_df = moves_df.applymap(lambda x: {} if isnull(x) else x)

moves_df['a2_name'] = [d.get('name') for d in moves_df.a2_moves]
moves_df['b2_name'] = [d.get('name') for d in moves_df.b2_moves]
moves_df['c2_name'] = [d.get('name') for d in moves_df.c2_moves]

moves_df = moves_df.drop(columns=['a2', 'b2', 'c2', 'a2_moves', 'b2_moves', 'c2_moves'])
moves_df = moves_df.fillna({column: '' for column in moves_df.columns})

moves_df['moves'] = moves_df['a2_name'] + " " + moves_df['b2_name'] + " " + moves_df['c2_name']
moves_df['moves'] = moves_df['moves'].str.replace('  ', ' ')
moves_df = moves_df.drop(columns=['a2_name', 'b2_name', 'c2_name'])

transposed_datad_mo = clean_df.drop(columns=['moves'])
clean_df = pd.merge(transposed_datad_mo,moves_df,left_index=True,right_index=True)
clean_df

Unnamed: 0_level_0,base_experience,height,types,weight,abilities,moves
pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abomasnow,173,22,"[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1355,snow-warning soundproof,mega-punch ice-punch swords-dance
abomasnow-mega,208,27,"[{'slot': 1, 'type': {'name': 'grass', 'url': ...",1850,snow-warning,ice-punch swords-dance leer
abra,62,9,"[{'slot': 1, 'type': {'name': 'psychic', 'url'...",195,synchronize inner-focus magic-guard,mega-punch fire-punch ice-punch
absol,163,12,"[{'slot': 1, 'type': {'name': 'dark', 'url': '...",470,pressure super-luck justified,scratch razor-wind swords-dance
absol-mega,198,12,"[{'slot': 1, 'type': {'name': 'dark', 'url': '...",490,magic-bounce,scratch razor-wind swords-dance
...,...,...,...,...,...,...
zygarde-10,243,12,"[{'slot': 1, 'type': {'name': 'dragon', 'url':...",335,aura-break,bind body-slam bite
zygarde-10-power-construct,243,12,"[{'slot': 1, 'type': {'name': 'dragon', 'url':...",335,power-construct,bind body-slam bite
zygarde-50,300,50,"[{'slot': 1, 'type': {'name': 'dragon', 'url':...",3050,aura-break,bind body-slam bite
zygarde-50-power-construct,300,50,"[{'slot': 1, 'type': {'name': 'dragon', 'url':...",3050,power-construct,bind body-slam bite


In [29]:
# Adjusting the 'types' column
a3 = transposed_datad['types'].str.get(0)
b3 = transposed_datad['types'].str.get(1)
c3 = transposed_datad['types'].str.get(2)
types_df = pd.concat({'a3':a3, 'b3':b3, 'c3':c3}, axis = 1)
types_df = types_df.applymap(lambda x: {} if isnull(x) else x)

types_df['a3_types'] = [d.get('type') for d in types_df.a3]
types_df['b3_types'] = [d.get('type') for d in types_df.b3]
types_df['c3_types'] = [d.get('type') for d in types_df.c3]

types_df = types_df.applymap(lambda x: {} if isnull(x) else x)

types_df['a3_name'] = [d.get('name') for d in types_df.a3_types]
types_df['b3_name'] = [d.get('name') for d in types_df.b3_types]
types_df['c3_name'] = [d.get('name') for d in types_df.c3_types]

types_df = types_df.drop(columns=['a3', 'b3', 'c3', 'a3_types', 'b3_types', 'c3_types'])
types_df = types_df.fillna({column: '' for column in types_df.columns})

types_df['types'] = types_df['a3_name'] + " " + types_df['b3_name'] + " " + types_df['c3_name']
types_df['types'] = types_df['types'].str.replace('  ', ' ')
types_df = types_df.drop(columns=['a3_name', 'b3_name', 'c3_name'])

transposed_datad_mo = clean_df.drop(columns=['types'])
clean_df = pd.merge(transposed_datad_mo,types_df,left_index=True,right_index=True)
clean_df

Unnamed: 0_level_0,base_experience,height,weight,abilities,moves,types
pokemon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
abomasnow,173,22,1355,snow-warning soundproof,mega-punch ice-punch swords-dance,grass ice
abomasnow-mega,208,27,1850,snow-warning,ice-punch swords-dance leer,grass ice
abra,62,9,195,synchronize inner-focus magic-guard,mega-punch fire-punch ice-punch,psychic
absol,163,12,470,pressure super-luck justified,scratch razor-wind swords-dance,dark
absol-mega,198,12,490,magic-bounce,scratch razor-wind swords-dance,dark
...,...,...,...,...,...,...
zygarde-10,243,12,335,aura-break,bind body-slam bite,dragon ground
zygarde-10-power-construct,243,12,335,power-construct,bind body-slam bite,dragon ground
zygarde-50,300,50,3050,aura-break,bind body-slam bite,dragon ground
zygarde-50-power-construct,300,50,3050,power-construct,bind body-slam bite,dragon ground


### Building the model

In [2]:
Final_Data = pd.read_csv('C:/Users/mendi/Desktop/USD Class Files/ADS 509/Final_Data.csv')
Final_Data.head()

Unnamed: 0.1,Unnamed: 0,pokemon,base_experience,height,name,order,weight,abilities,moves,types
0,0,abomasnow,173.0,22,abomasnow,585,1355,"'snow-warning', 'soundproof'","'mega-punch', 'ice-punch', 'swords-dance', 'me...","'grass', 'ice'"
1,1,abomasnow-mega,208.0,27,abomasnow-mega,586,1850,'snow-warning',"'ice-punch', 'swords-dance', 'leer', 'mist', '...","'grass', 'ice'"
2,2,abra,62.0,9,abra,103,195,"'synchronize', 'inner-focus', 'magic-guard'","'mega-punch', 'fire-punch', 'ice-punch', 'thun...",'psychic'
3,3,absol,163.0,12,absol,478,470,"'pressure', 'super-luck', 'justified'","'scratch', 'razor-wind', 'swords-dance', 'cut'...",'dark'
4,4,absol-mega,198.0,12,absol-mega,479,490,'magic-bounce',"'scratch', 'razor-wind', 'swords-dance', 'cut'...",'dark'


In [3]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report

pokemon_data_encoded = Final_Data.copy()

# Handle missing 'moves' data
pokemon_data_encoded['moves'] = pokemon_data_encoded['moves'].apply(lambda x: x if isinstance(x, list) else [])

# MultiLabelBinarizer encode 'abilities'
mlb_abilities = MultiLabelBinarizer()
abilities_encoded = mlb_abilities.fit_transform(pokemon_data_encoded['abilities'])
abilities_encoded_df = pd.DataFrame(abilities_encoded, columns=mlb_abilities.classes_)
pokemon_data_encoded = pd.concat([pokemon_data_encoded.drop('abilities', axis=1), abilities_encoded_df], axis=1)

# MultiLabelBinarizer encode 'moves'
mlb_moves = MultiLabelBinarizer()
moves_encoded = mlb_moves.fit_transform(pokemon_data_encoded['moves'])
moves_encoded_df = pd.DataFrame(moves_encoded, columns=mlb_moves.classes_)
pokemon_data_encoded = pd.concat([pokemon_data_encoded.drop('moves', axis=1), moves_encoded_df], axis=1)

# Separate features from the target
X = pokemon_data_encoded.drop('types', axis=1)  
y = Final_Data['types']  

# MultiLabelBinarizer encode 'types'
mlb_types = MultiLabelBinarizer()
y_encoded = mlb_types.fit_transform(y)

# Split 
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# One hot encoding
X_train = pd.get_dummies(X_train)
X_test = pd.get_dummies(X_test)

# Align 
X_train, X_test = X_train.align(X_test, join='left', axis=1)

# Handle NaN 
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)

# RFC
clf = RandomForestClassifier(random_state=42)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300, 500],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Grid search
grid_search = GridSearchCV(estimator = clf, param_grid = param_grid, 
                           cv = 3, n_jobs = -1, verbose = 2)

grid_search.fit(X_train, y_train)

# Get best estimator
best_clf = grid_search.best_estimator_

# Predictions
y_pred = best_clf.predict(X_test)

# Print
print(classification_report(y_test, y_pred))

Fitting 3 folds for each of 288 candidates, totalling 864 fits
              precision    recall  f1-score   support

           0       0.82      0.71      0.76       127
           1       1.00      1.00      1.00       226
           2       0.82      0.71      0.76       127
           3       0.80      0.92      0.86       117
           4       1.00      0.16      0.27        19
           5       0.86      0.32      0.46        79
           6       0.89      0.21      0.34        38
           7       0.79      0.62      0.69        91
           8       0.82      0.45      0.58        69
           9       0.85      0.79      0.82       121
          10       0.90      0.30      0.44        61
          11       0.84      0.90      0.87       141
          12       1.00      0.21      0.35        28
          13       0.92      0.46      0.62        71
          14       1.00      0.43      0.60        21
          15       0.74      0.79      0.76        96
          16      

In [4]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.24336283185840707

### Create the app

In [5]:
import joblib
joblib.dump(clf, 'Pokemon_modell.pkl')

['Pokemon_modell.pkl']

In [6]:
Pokemon_model = open('Pokemon_modell.pkl','rb')
clf = joblib.load(Pokemon_model)