# Date with Data 2020.04.29 

## Exploratory Data Analysis with Kaggle Pokemon CSV

## Author: Aaron W Chen 

---

## Import necessary libraries

In [1]:
import csv
import pandas as pd
import numpy as np
import matplotlib as pyplot
from random import randrange
%matplotlib inline

---

In [2]:
df = pd.read_csv('../raw_data/Pokemon.csv')
df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [3]:
df.describe()

Unnamed: 0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


df['#'].nunique()

---

df[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].sum(axis=1)

df['imputed_calc_total'] = df[['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].sum(axis=1)
df.head(20)

df[df['Total'] != df['imputed_calc_total']]

df[df['Total'] == df['imputed_calc_total']]

gen_1_df = df[df['Generation'] == 1]
gen_1_df.head(20)

---

Pokemon Damage calculation (from https://bulbapedia.bulbagarden.net/wiki/Damage):

Damage = ((((2 X Level) / 5 + 2) X Power X A/D ) / 50 + 2) X Modifier

A = is the move's stat base (Atk or SpAtk) and D is the defending Pokemon's Def or SpDef
Modifier = Targets X Weather X Badge X Critical X Random X STAB X Type X Burn X other
Targets = 0.75 with multiple targets, and 1 if solo target
Weather is 1.5 for water in rain or fire in harsh sunlight and 0.5 for water in harsh sunlight or a fire move in rain, otherwise just 1
Badge = 1 (feature dropped after Gen 2)
Critical = 1.5 if the attack is a successful crit
Random = random number between 0.85 and 1.0
STAB = is 1.5 if move type matches Pokemon type, otherwise 1
Type = 0, 0.5, 1, 2, or 4 depending on matchup

For our purposes (ignoring items, abilities, weather, and badge), we can make Badge, Burn, Other, Critical, Weather, Targets = 1, and STAB = 1.5
Thus:
Modifier = 1.5 X Random X Type

Also, let's use the bigger of Atk/SpAtk and Def/SpDef for A and D.

Power = 100 and Level = 1

Rewrite equation:
Damage = ((( 240/50 X A/D ) + 2) X 1.5 X Random X Type

So, we need to figure out
1. How to get random
    Import Math to get Random(0.85, 1) inclusive
2. How to get Type, which is the multiple of 2 effect
    Compare the inputted Pokemon Type (one at a time) against the Defending Pokemon Types (for dual types, these are multiplied together on defense)
        Assign damage effectiveness for type 1, then multiply against type 2
        
    Repeat calculation for defending Pokemon vs inputted type
        
3. Sort from max to min of damage inflicted, and display top, middle, and bottom 3
4. Sort from max to min of damage received, and display top, middle, and bottom 3
    

In [10]:
rand_mod = randrange(start=85, stop=100, step=1) / 100

In [11]:
df['Type 1'].unique()

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying'], dtype=object)

In [14]:
df['Type 1'].describe()

count       800
unique       18
top       Water
freq        112
Name: Type 1, dtype: object

In [12]:
df['Type 2'].unique()

array(['Poison', nan, 'Flying', 'Dragon', 'Ground', 'Fairy', 'Grass',
       'Fighting', 'Psychic', 'Steel', 'Ice', 'Rock', 'Dark', 'Water',
       'Electric', 'Fire', 'Ghost', 'Bug', 'Normal'], dtype=object)

In [15]:
df['Type 2'].describe()

count        414
unique        18
top       Flying
freq          97
Name: Type 2, dtype: object

In [20]:
all_types = set(df['Type 2'].tolist())
all_types

{'Bug',
 'Dark',
 'Dragon',
 'Electric',
 'Fairy',
 'Fighting',
 'Fire',
 'Flying',
 'Ghost',
 'Grass',
 'Ground',
 'Ice',
 'Normal',
 'Poison',
 'Psychic',
 'Rock',
 'Steel',
 'Water',
 nan}

dmg_mult = 1
if input_pokemon_type_1 == "Fire":
    
    
Make sets of weaknesses and strengths for each type
    For Fire: strong_against = {Bug, Grass, Ice, Steel}, reduced_against = {Rock, Fire, Water, Dragon}, zero_against = {}
    if defending_pokemon_type_1 isin strong_against:
        dmg_mult *= 2
        
    if defending_pkmn_type_1 isin reduced_against:
        dmg_mult *= 1/2
        
    if defending_pkmn_type1 is in zero_against:
        dmg_mult *= 0
        
    