In [96]:
# Create a multi-output model to predict the data 
# A multi-output model predicts multiple target variables from the same input.
# PLAN 
    # if the input is Name, Type1, Type2, then the model will predict the Evolution
    # if the input is Name, Evolution then the model will predict the primary type(Type1)
    # if the input is Name, Type1, evolution, then the model will predict the sccondary type(Type2)

In [97]:
# Dependicies
import numpy as np 
import pandas as pd
import matplotlib.pyplot as pt 
import tensorflow as tf 
from sklearn.model_selection import KFold, GridSearchCV 
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, hamming_loss, precision_score, f1_score, roc_curve, auc
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from itertools import cycle
# Saving the current model as the best model using the joblib
import joblib as jb 

In [98]:
# load the pokemon dataset 
pokemon_df = pd.read_csv("PokemonDataset/pokemon.csv")
pokemon_df.shape

(809, 4)

In [99]:
# get the first 5 rows as the sample 
pokemon_df.head(10)

Unnamed: 0,Name,Type1,Type2,Evolution
0,bulbasaur,Grass,Poison,ivysaur
1,ivysaur,Grass,Poison,venusaur
2,venusaur,Grass,Poison,
3,charmander,Fire,,charmeleon
4,charmeleon,Fire,,charizard
5,charizard,Fire,Flying,
6,squirtle,Water,,wartortle
7,wartortle,Water,,blastoise
8,blastoise,Water,,
9,caterpie,Bug,,metapod


In [141]:
# change all the na values to 0 and store it in another dataframe 
pokemon_df_removedNA = pokemon_df.fillna(0)
pokemon_df_removedNA.tail(10)

Unnamed: 0,Name,Type1,Type2,Evolution
799,necrozma,Psychic,0,0
800,magearna,Steel,Fairy,0
801,marshadow,Fighting,Ghost,0
802,poipole,Poison,0,0
803,naganadel,Poison,Dragon,0
804,stakataka,Rock,Steel,0
805,blacephalon,Fire,Ghost,0
806,zeraora,Electric,0,0
807,meltan,Steel,0,0
808,melmetal,Steel,0,0


In [101]:
# Encode the values to fed to a model using LableEncoding(lable all the values to numberical format) 

# encoder instance for name 
name_encoder = LabelEncoder()
pokemon_df_removedNA["Name"] = name_encoder.fit_transform(pokemon_df_removedNA["Name"].astype(str))

# encoder instance for Type1 
type1_encoder = LabelEncoder()
pokemon_df_removedNA["Type1"] = type1_encoder.fit_transform(pokemon_df_removedNA["Type1"].astype(str))

# encoder instance for Type2
type2_encoder = LabelEncoder()
pokemon_df_removedNA["Type2"] = type2_encoder.fit_transform(pokemon_df_removedNA["Type2"].astype(str))

# encoder instance for Evolution
evolution_encoder = LabelEncoder()
pokemon_df_removedNA["Evolution"] = evolution_encoder.fit_transform(pokemon_df_removedNA["Evolution"].astype(str))

# dump all the encoders to predict the data
jb.dump(name_encoder, "name_encoder.pkl")
jb.dump(type1_encoder, "type1_encoder.pkl")
jb.dump(type2_encoder, "type2_encoder.pkl")
jb.dump(evolution_encoder, "evolution_encoder.pkl")

# print the head data 
pokemon_df_removedNA.head(10)

Unnamed: 0,Name,Type1,Type2,Evolution
0,73,9,14,13
1,321,9,14,31
2,751,9,14,0
3,95,6,0,8
4,96,6,0,7
5,93,6,8,0
6,661,17,0,32
7,770,17,0,5
8,56,17,0,0
9,88,0,0,16


In [102]:
# Create the features and labels for the model 
feature_predictEvolution = pokemon_df_removedNA[["Name", "Type1", "Type2"]]
label_predictEvolution = pokemon_df_removedNA["Evolution"]

feature_predictType1 = pokemon_df_removedNA[["Name", "Evolution"]]
label_predictType1 = pokemon_df_removedNA["Type1"]

feature_predictType2 = pokemon_df_removedNA[["Name", "Type1", "Evolution"]]
label_predictType2 = pokemon_df_removedNA["Type2"]

feature_predictEvolution

Unnamed: 0,Name,Type1,Type2
0,73,9,14
1,321,9,14
2,751,9,14
3,95,6,0
4,96,6,0
...,...,...,...
804,662,15,17
805,55,6,9
806,802,3,0
807,429,16,0


In [103]:
# construct a pipeline to lay the model 
model_pipeline = Pipeline([
    ('scale', StandardScaler()),
    ("classifier", RandomForestClassifier(random_state=42, class_weight="balanced"))
])

In [104]:
# hyperparameter
hyperparameter = {
    'classifier__max_depth': [3, 5, 10, 20, 30, 50],  
    'classifier__min_samples_split': [2, 5, 10, 20],
    'classifier__n_estimators': [300, 500, 700, 1000]
}

# Define a custom scorer for multi-output classification
def custom_f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average="weighted", zero_division=1)

def custom_hamming(y_true, y_pred):
    return -hamming_loss(y_true, y_pred)

def custom_precision(y_true, y_pred):
    return precision_score(y_true, y_pred, average="weighted", zero_division=1)
    return np.mean(losses)

# construct the gridsearch cross validation
gridSearchCV = GridSearchCV(
    model_pipeline, 
    hyperparameter, 
    cv=kfold,
    scoring = {
        'F1': make_scorer(custom_f1, greater_is_better=True), 
        'HAMMING_LOSS': make_scorer(custom_hamming, greater_is_better=True),  
        "PRECISION": make_scorer(custom_precision, greater_is_better=True),
    }, 
    refit="HAMMING_LOSS",
    n_jobs=-1,
    verbose=1   
)

In [105]:
# Create a method to train and store the models 
def train_store_model(x,y, file_name):
    gridSearchCV.fit(x, y)
    print("Best parameters:", gridSearchCV.best_params_)
    print("Best cross-validation score:", gridSearchCV.best_score_)
    jb.dump(gridSearchCV.best_estimator_, file_name)

In [106]:
# Train and save the model
# Model 1: Predict Evolution
train_store_model(feature_predictEvolution, label_predictEvolution, "model_evolution.pkl")

# Model 2: Predict Type1
train_store_model(feature_predictType1, label_predictType1, "model_type1.pkl")

# Model 3: Predict Type2
train_store_model(feature_predictType2, label_predictType2, "model_type2.pkl")

Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best parameters: {'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 300}
Best cross-validation score: -0.056851468445671335
Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best parameters: {'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 300}
Best cross-validation score: -0.7837052373284258
Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best parameters: {'classifier__max_depth': 20, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 1000}
Best cross-validation score: -0.5598803772716816


In [177]:
def predict_pokemon(name: str = None, type1: str = None, type2: str = None, evolution: str = None):
    # Load Encoders
    name_encoder = jb.load("name_encoder.pkl")
    type1_encoder = jb.load("type1_encoder.pkl")
    type2_encoder = jb.load("type2_encoder.pkl")
    evolution_encoder = jb.load("evolution_encoder.pkl")
    
    # Encode Inputs
    name_encoded = name_encoder.transform([name])[0] if name else -1
    type1_encoded = type1_encoder.transform([type1])[0] if type1 else -1
    type2_encoded = type2_encoder.transform([type2])[0] if type2 else -1
    # If no evolution is provided, use default value (0 here)
    evolution_encoded = evolution_encoder.transform([evolution])[0] if evolution is not None else -1

    # Check Scenario & Load Model
    if name and type1 and evolution is None:
        # If evolution is not provided, predict Evolution
        print("Predicting Evolution...")
        model = jb.load("model_evolution.pkl")
        input_data = pd.DataFrame([[name_encoded, type1_encoded, type2_encoded]], 
                                  columns=["Name", "Type1", "Type2"])
        prediction = model.predict(input_data)[0]
        output = evolution_encoder.inverse_transform([prediction])[0]

    elif name and evolution and not type1:
        # If type1 is not provided, predict Type1 (Primary Type)
        print("Predicting Type1 (Primary Type)...")
        model = jb.load("model_type1.pkl")
        input_data = pd.DataFrame([[name_encoded, evolution_encoded]], 
                                  columns=["Name", "Evolution"])
        prediction = model.predict(input_data)[0]
        output = type1_encoder.inverse_transform([prediction])[0]

    elif name and type1 and evolution:
        # If evolution is provided along with name and type1, predict Type2 (Secondary Type)
        print("Predicting Type2 (Secondary Type)...")
        model = jb.load("model_type2.pkl")
        input_data = pd.DataFrame([[name_encoded, type1_encoded, evolution_encoded]], 
                                  columns=["Name", "Type1", "Evolution"])
        prediction = model.predict(input_data)[0]
        output = type2_encoder.inverse_transform([prediction])[0]

    else:
        return "Invalid input combination."

    return f"Predicted Output: {output if output != '0' else 'Nothing....!!!'}"

In [180]:
predict_pokemon(name="marshadow", type1="Fighting", evolution="0")

Predicting Type2 (Secondary Type)...


'Predicted Output: Ghost'