# Pokemon dataset

We are going to predict if a pokemon is legendary or not. Because of infinite businessvalue 

## Libraries
This demo is intended to demo sci-kit models trained and validated with a pandas data-frame so we will use the sci-kit and pandas libraries

In [1]:
import os
import pandas as pd

## Load dataset
This dataset origins from: https://www.kaggle.com/abcsds/pokemon/downloads/Pokemon.csv

In [2]:
data_folder = "data"
data_file = "Pokemon.csv"

data_frame = pd.DataFrame.from_csv(os.path.join(data_folder, data_file))
data_frame.head(5)

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


## meta data

In [3]:
print("number of rows: " + str(len(data_frame)))

number of rows: 800


In [4]:
data_frame.describe()

Unnamed: 0,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [5]:
data_frame.dtypes

Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

## Splitting labels from the set

In [6]:
data_frame = data_frame.set_index(["Name"])

In [7]:
y = data_frame.Legendary
x = data_frame.drop(['Legendary'],1)

## Feature engineering

We will convert the categoricals with one hot encoding

In [8]:
x = pd.get_dummies(x,drop_first=True)
x.head(1)

Unnamed: 0_level_0,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Type 1_Dark,Type 1_Dragon,...,Type 2_Ghost,Type 2_Grass,Type 2_Ground,Type 2_Ice,Type 2_Normal,Type 2_Poison,Type 2_Psychic,Type 2_Rock,Type 2_Steel,Type 2_Water
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Bulbasaur,318,45,49,49,65,65,45,1,0,0,...,0,0,0,0,0,1,0,0,0,0


## Data science
To keep the example simple we'll just use a ordinary logistic regression for binary classification

In [9]:
import tpot

In [10]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(C=10, penalty='l1', tol=0.01)

## Evaluation

In [11]:
from sklearn.model_selection import cross_val_score

In [12]:
scores = cross_val_score(model, x, y, cv=10)

In [13]:
scores

array([ 0.92592593,  0.92592593,  0.92592593,  0.91358025,  0.92592593,
        0.87341772,  0.93670886,  0.97468354,  0.92405063,  0.89873418])

# Host the demo interface
We need a trained model and a clear x and y set

In [14]:
trained_model = model.fit(x,y)

In [None]:
from flask import Flask, jsonify, request
from flask_cors import CORS, cross_origin
app = Flask(__name__)
CORS(app)

@app.route("/predict",methods=['POST'])
@cross_origin()
def predict():
    input_data = request.get_json()
    print("Received input data: {}".format(str(input_data)))
    if input_data==None:
        print("Empty data received")
        return
    input_df = pd.DataFrame.from_dict([input_data])
    prediction = model.predict(input_df)
    print(prediction)
    return jsonify({"prediction": str(prediction[0])})

@app.route("/input_features")
@cross_origin()
def input_features():
    return jsonify(x.columns.tolist())

@app.route("/example_input")
@cross_origin()
def example_input():
    return x.iloc[0].to_json()

app.run(host='0.0.0.0', port=5777, debug=False, threaded=True)

 * Running on http://0.0.0.0:5777/ (Press CTRL+C to quit)
127.0.0.1 - - [16/Aug/2017 14:18:48] "OPTIONS /predict HTTP/1.1" 200 -


Received input data: {'Total': '3180', 'HP': '450', 'Attack': '490', 'Defense': '490', 'Sp. Atk': '650', 'Sp. Def': '650', 'Speed': '450', 'Generation': 1, 'Type 1_Dark': 0, 'Type 1_Dragon': 0, 'Type 1_Electric': 0, 'Type 1_Fairy': 0, 'Type 1_Fighting': 0, 'Type 1_Fire': 0, 'Type 1_Flying': 0, 'Type 1_Ghost': 0, 'Type 1_Grass': 1, 'Type 1_Ground': 0, 'Type 1_Ice': 0, 'Type 1_Normal': 0, 'Type 1_Poison': 0, 'Type 1_Psychic': 0, 'Type 1_Rock': 0, 'Type 1_Steel': 0, 'Type 1_Water': 0, 'Type 2_Dark': 0, 'Type 2_Dragon': 0, 'Type 2_Electric': 0, 'Type 2_Fairy': 0, 'Type 2_Fighting': 0, 'Type 2_Fire': 0, 'Type 2_Flying': 0, 'Type 2_Ghost': 0, 'Type 2_Grass': 0, 'Type 2_Ground': 0, 'Type 2_Ice': 0, 'Type 2_Normal': 0, 'Type 2_Poison': 1, 'Type 2_Psychic': 0, 'Type 2_Rock': 0, 'Type 2_Steel': 0, 'Type 2_Water': 0}
[False]


127.0.0.1 - - [16/Aug/2017 14:18:48] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:19:11] "OPTIONS /predict HTTP/1.1" 200 -


Received input data: {'Total': '3180', 'HP': '4500', 'Attack': '4900', 'Defense': '4900', 'Sp. Atk': '6500', 'Sp. Def': '6500', 'Speed': '4500', 'Generation': 1, 'Type 1_Dark': 0, 'Type 1_Dragon': 0, 'Type 1_Electric': 0, 'Type 1_Fairy': 0, 'Type 1_Fighting': 0, 'Type 1_Fire': 0, 'Type 1_Flying': 0, 'Type 1_Ghost': 0, 'Type 1_Grass': 1, 'Type 1_Ground': 0, 'Type 1_Ice': 0, 'Type 1_Normal': 0, 'Type 1_Poison': 0, 'Type 1_Psychic': 0, 'Type 1_Rock': 0, 'Type 1_Steel': 0, 'Type 1_Water': 0, 'Type 2_Dark': 0, 'Type 2_Dragon': 0, 'Type 2_Electric': 0, 'Type 2_Fairy': 0, 'Type 2_Fighting': 0, 'Type 2_Fire': 0, 'Type 2_Flying': 0, 'Type 2_Ghost': 0, 'Type 2_Grass': 0, 'Type 2_Ground': 0, 'Type 2_Ice': 0, 'Type 2_Normal': 0, 'Type 2_Poison': 1, 'Type 2_Psychic': 0, 'Type 2_Rock': 0, 'Type 2_Steel': 0, 'Type 2_Water': 0}
[ True]


127.0.0.1 - - [16/Aug/2017 14:19:11] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:21:20] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:21:43] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:22:25] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:23:07] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:24:44] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:25:11] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:25:17] "OPTIONS /predict HTTP/1.1" 200 -


Received input data: {'Total': 318, 'HP': 45, 'Attack': 49, 'Defense': 49, 'Sp. Atk': 65, 'Sp. Def': 65, 'Speed': 45, 'Generation': 1, 'Type 1_Dark': 0, 'Type 1_Dragon': 0, 'Type 1_Electric': 0, 'Type 1_Fairy': 0, 'Type 1_Fighting': 0, 'Type 1_Fire': 0, 'Type 1_Flying': 0, 'Type 1_Ghost': 0, 'Type 1_Grass': 1, 'Type 1_Ground': 0, 'Type 1_Ice': 0, 'Type 1_Normal': 0, 'Type 1_Poison': 0, 'Type 1_Psychic': 0, 'Type 1_Rock': 0, 'Type 1_Steel': 0, 'Type 1_Water': 0, 'Type 2_Dark': 0, 'Type 2_Dragon': 0, 'Type 2_Electric': 0, 'Type 2_Fairy': 0, 'Type 2_Fighting': 0, 'Type 2_Fire': 0, 'Type 2_Flying': 0, 'Type 2_Ghost': 0, 'Type 2_Grass': 0, 'Type 2_Ground': 0, 'Type 2_Ice': 0, 'Type 2_Normal': 0, 'Type 2_Poison': 1, 'Type 2_Psychic': 0, 'Type 2_Rock': 0, 'Type 2_Steel': 0, 'Type 2_Water': 0}
[False]


127.0.0.1 - - [16/Aug/2017 14:25:17] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:25:42] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:25:55] "GET /example_input HTTP/1.1" 200 -
127.0.0.1 - - [16/Aug/2017 14:26:07] "GET /example_input HTTP/1.1" 200 -
