# Competitive Pokemon Analysis

## Goal

## Results

## Updates

# Pokemon Stats

In [1]:
import pandas as pd
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go
import numpy as np

# Get Pokemon Stats CSV File
Pkmn_raw = pd.read_csv('Pokemon.csv')

## Cleaning Pokemon Data

### Pokemon Names

In [2]:
# CLEAN POKEMON NAMES
# Assuming it's fine to keep as _raw
#Pandas Multiple Replace
#https://stackoverflow.com/questions/36072626/pandas-replace-multiple-values-at-once

Name_Replacements = {
    'Name':{
        # Remove duplicate names found in Mega, Primal, and Hoopa
        # Regex: ?= -> Look ahead Assertion
        # [Regex -> English] Replace everything before "Target Word" with empty space 
        # Pkmn_raw['Name'] = Pkmn_raw['Name'].str.replace(".*(?=Mega)", "")
        r'.*(?=Mega|Primal|Hoopa)': '',
        # Add Space between  Capitals to account in Forme, Cloake, Mode, Size
        # Regex:
        # [Regex -> English] Find two consecutive groups of (Words) and (Capital Letter) or ("50%") and Add a space between the two groups
        r'(\w)([A-Z])|(?=50%)': r'\1 \2'
    }
}

Pkmn_raw.replace(Name_Replacements, regex=True, inplace=True)

### Pokemon Generation Differences

In [3]:
# CLEAN POKEMON GEN
# Issue: Mega Evolutions and Primal Reversions are Gen 6, but are appended to the respective Pokemon in the Pokedex

Pkmn_raw['Gen_Normalized'] = Pkmn_raw['Generation']

# CASE: Meganium -> NOT A MEGA EVOLUTION 
Pkmn_raw.loc[Pkmn_raw['Name'].str.contains('Mega |Primal '), 'Gen_Normalized'] = 6

#Pkmn_raw['Generation Normalized'] = Pkmn_raw['Generation'].replace(Gen_Replacements,regex=True)

# Make Generation Normalized = 6 for Mega and Primal Transform


In [4]:
Pkmn_raw.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Gen_Normalized
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False,1
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False,1
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False,1
3,3,Mega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False,6
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False,1


## Pokemon Base Stats per Generation

In [6]:
# Box Plot of Base Stats per Generation (I-VI)
trace_all = go.Box(
                y = Pkmn_raw['Total'],
                name = 'All Gen', 
                boxpoints = 'all', 
                text = Pkmn_raw['Name'],
                pointpos = -2,
                jitter = 0.6,
                boxmean = 'sd',
                )

data = [trace_all]
for i in Pkmn_raw['Gen_Normalized'].sort_values().unique():
    trace = go.Box(
        y = Pkmn_raw.loc[Pkmn_raw['Gen_Normalized'] == i, 'Total'],
        name = "Gen " + str(i),
        boxpoints = 'all',
        text = Pkmn_raw.loc[Pkmn_raw['Gen_Normalized'] == i,'Name'],
        jitter = 0.6,
        boxmean = 'sd',
        pointpos = -2,
    )    
    data.append(trace)

layout = go.Layout(
    title = 'Total Stats Per Generation (I-VI)',
    autosize = False,
    yaxis = dict(range = [100, 900]),
    height = 2000,
    width = 4000,
)

fig = go.Figure(data = data, layout = layout)
py.iplot(fig)



## Pokemon Species Polar Scatter (Nice Looking, but not the best option)

In [7]:
# POLAR SCATTER [CURRENTLY NOT IN USE!]
# Starter comparison
# 0 - 200
#         HP
#  ATTACK   SPECIAL ATTACK
#  DEFENSE  SPECIAL DEFENSE
#        SPEED
# HP ATTACK DEFENSE SP ATTACK SP DEF SPEED
# Set up plot [Check]
# Reorient Categories [Check]
# Value depending on pokemon name and column [Check]


def PkmnPolarStats(Pkmn_Comparison):
    data = []
    for Mon in Pkmn_Comparison:
        Stats_Polar = go.Scatterpolar(
            name = Mon,
            r = [
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'HP'], 
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'Sp. Atk'], 
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'Sp. Def'], 
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'Speed'], 
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'Defense'], 
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'Attack'], 
                 Pkmn_raw.loc[Pkmn_raw['Name'] == Mon,'HP']
                ],
            theta = ['HP','Sp. Atk','Sp. Def','Speed','Defense','Attack','HP'],
            fill = 'toself',
            hoveron = 'points',
        )
        data.append(Stats_Polar)
    return data

testdata1 = PkmnPolarStats(['Charizard', 'Charmeleon', 'Charmander'])

layout = go.Layout(   
    polar = dict(
        radialaxis = dict(
            range = [0,200],
        ),
        angularaxis = dict(
            rotation = 90,
        ),
    ),
    showlegend = True,
    title = 'Charmander to Charizard',
)


fig = go.Figure(data=testdata1,layout=layout)
py.iplot(fig, filename='polar-category')

## Pokemon Starter Comparison

In [8]:
# Make a separate Dataframe for Pokemon Starters
Pkmn_starters = pd.DataFrame([])
for i in Pkmn_raw['Gen_Normalized'].sort_values().unique():
    if i != 5:
        Pkmn_starters = Pkmn_starters.append(Pkmn_raw.loc[(Pkmn_raw['Gen_Normalized'] == i) & (Pkmn_raw['Generation']==Pkmn_raw['Gen_Normalized'])][0:9])
    # Gen 5 Victini CASE: Victini is the 1st Pokemon listed in Gen 5
    if i == 5:
        Pkmn_starters = Pkmn_starters.append(Pkmn_raw.loc[(Pkmn_raw['Gen_Normalized'] == i) & (Pkmn_raw['Generation']==Pkmn_raw['Gen_Normalized'])][1:10])
    
    
Pkmn_starters.drop(columns=['Legendary','Gen_Normalized'])
evo = pd.Series([1,2,3])
Pkmn_starters['Evolution_Stage'] = np.tile(evo, len(Pkmn_starters) //len(evo))


In [9]:
# Plot Box Plot comparing all Starters
def StarterTypeEvoBoxPlot(Type, Evolution, color):
    Stats_List=['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
    trace = go.Box(
        y = pd.concat([
            # Get the stats(HP, Attack, Defense...) of pokemon of a specific type and evolution
            # Probably a better way to get this done in a loop that appends as an array (for some reason can't do this in the proper format)
            Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'HP'],
            Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'Attack'],
            Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'Defense'],
            Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'Sp. Atk'],
            Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'Sp. Def'],
            Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'Speed'],
        ]),
        # Repeat the naming convention for each pokemon stat 
        x = np.repeat(Stats_List,Pkmn_starters['Generation'].max()), #6 for All 6 generations of (grass/fire/water) type pokemon,
        name = Type + " Stage: " + str(Evolution),
        boxpoints = 'all',
        text = pd.concat([Pkmn_starters.loc[(Pkmn_starters['Type 1'] == Type) & (Pkmn_starters['Evolution_Stage'] == Evolution),'Name']] * Pkmn_raw['Generation'].max()),
        marker = dict(
            color = color
        ),
    )
    return trace

grass1 = StarterTypeEvoBoxPlot('Grass', 1, '#A7DB8D')
grass2 = StarterTypeEvoBoxPlot('Grass', 2, '#78C850' )
grass3 = StarterTypeEvoBoxPlot('Grass', 3, '#4E8234')
fire1 = StarterTypeEvoBoxPlot('Fire', 1, '#F5AC78')
fire2 = StarterTypeEvoBoxPlot('Fire', 2, '#F08030')
fire3 = StarterTypeEvoBoxPlot('Fire', 3, '#9C531F')
water1 = StarterTypeEvoBoxPlot('Water', 1, '#9DB7F5')
water2 = StarterTypeEvoBoxPlot('Water', 2, '#6890F0')
water3 = StarterTypeEvoBoxPlot('Water', 3, '#445E9C')
data = [grass1, fire1, water1,
        grass2, fire2, water2,
        grass3, fire3, water3 
       ]

layout = go.Layout(
    title = 'Pokemon Starters',
    boxmode='group'
)

fig = go.Figure(data = data, layout = layout)
py.iplot(fig)


## Stat Distribution (Better to select one stat)

In [10]:
import plotly.figure_factory as ff
# Add histogram data
x1 = Pkmn_raw['HP']
x2 = Pkmn_raw['Attack']
x3 = Pkmn_raw['Defense']
x4 = Pkmn_raw['Sp. Atk']
x5 = Pkmn_raw['Sp. Def']
x6 = Pkmn_raw['Speed']

# Group data together
hist_data = [x1, x2, x3, x4, x5, x6]

group_labels = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp.Def','Speed']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, show_curve=False)

# Plot!
py.iplot(fig, filename='Distplot with Multiple Datasets')

## Stat and Damage Calculations

### $$ HP = \Bigg \lfloor \frac{ \big( 2 \times Base + IV + \big\lfloor \frac{EV}{4} \big\rfloor \big) \times Level }{100} \Bigg\rfloor + Level + 10$$ 
### $$ Other Stat = \Bigg \lfloor \Bigg( \Bigg \lfloor\frac{ \big( 2 \times Base + IV + \big\lfloor \frac{EV}{4} \big\rfloor \big) \times Level }{100} \Bigg \rfloor + 5 \Bigg) \times Nature \Bigg \rfloor$$
### $$ Damage = \Bigg( \frac { \big( \frac{ 2 \times Level}{5} +2 \big) \times Power \times A/D}{50} +2 \Bigg) \times Modifier$$
### $$ Modifier = Targets \times Weather \times Critical \times random \times STAB \times Type \times Burn \times Other$$

### $$ IV = [0, 31]$$
### $$ EV = [0, 252]$$
### $$ Level = [1, 100]$$
### A = Attack Value from Attacker (either Attack or Special Attack)
### D = Defense Value from Defender (either Defense or Special Defense)
### Nature is a 0.9 or 1.1 multiplier for specific stats (e.g. +Att - Def)
Flooring brackets (aka flooring function) indicates a "rounding down" of that value
e.g. from wikipedia floor(2.4) = 2 
e.g. floor(2.9999) = 2 

In [11]:
HP_range = np.arange(1,255+1)

def StatLine_HP(level,IV,EV,name):
    trace = go.Scatter(
        x = HP_range,
        y = np.floor((((2 * HP_range)+ IV + np.floor(EV/4))* level)/100)+level+10,
        name = name,
    )
    return trace
#                         Lvl  IV  EV   Name
trace_base = StatLine_HP( 100, 0 ,   0, 'Base')
trace_IV =   StatLine_HP( 100, 31,   0, 'Max IV')
trace_EV =   StatLine_HP( 100, 0 , 252, 'MaxEV')
trace_max =  StatLine_HP( 100, 31, 252, 'MAX')
trace_base50 = StatLine_HP(50, 0 ,   0, 'Base 50')
trace_IV50 =   StatLine_HP(50, 31,   0, 'Max IV 50')
trace_EV50 =   StatLine_HP(50, 0 , 252, 'MaxEV 50')
trace_max50 =  StatLine_HP(50, 31, 252, 'MAX 50')
data = [trace_base,trace_IV,trace_EV,trace_max,
        trace_base50,trace_IV50,trace_EV50,trace_max50
       ]

py.iplot(data)

In [19]:
DmgRatio_range = np.arange(0, 2+0.01,0.01)

def StatLine_Dmg(level,power,name):
    trace = go.Scatter(
        x = DmgRatio_range,
        y = (((((2*level)/5)+2)*power*DmgRatio_range)/50)+2,
        name = name,
    )
    return trace
base_100_100 = StatLine_Dmg(100,100,'L: 100, P: 100')
base_100_50  = StatLine_Dmg(100,50,'L: 100, P: 50')
base_100_150  = StatLine_Dmg(100,150,'L: 100, P: 150')
base_50_100 = StatLine_Dmg(50,100,'L: 50, P: 100')
base_50_50  = StatLine_Dmg(50,50,'L: 50, P: 50')
base_50_150  = StatLine_Dmg(50,150,'L: 50, P: 150')
data = [base_100_150,
        base_100_100,
        base_100_50,
        base_50_150,
        base_50_100,
        base_50_50,
       ]

layout = dict(title = 'Base Power (No Multipliers)',
              yaxis = dict(title = 'Damage'),
              xaxis = dict(title = 'A/D'),
              )
fig = dict(data=data,layout=layout)
py.iplot(fig)
