## Seperating data based on type

In [4]:
import pandas as pd

In [5]:
poke = pd.read_csv('data/Pokemon.csv')

In [6]:
print(poke.columns)
poke['Type 1'] = poke['Type 1'].apply(lambda x: x.lower() if isinstance (x,str) else x)
poke['Type 2'] = poke['Type 2'].apply(lambda x: x.lower() if isinstance (x,str) else x)

Index(['#', 'Name', 'Type 1', 'Type 2', 'Total', 'HP', 'Attack', 'Defense',
       'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')


In [7]:
type1_counts = poke['Type 1'].value_counts()
type2_counts = poke['Type 2'].value_counts()

type_unique = poke['Type 1'].unique()

In [8]:
print(f'Type 1 count: \n')
print(type1_counts)
print('-'*40)
print('type 2 count: \n')
print(type2_counts)
print('-'*40)
print(type_unique)

Type 1 count: 

Type 1
water       140
normal      123
grass       108
bug          90
fire         70
psychic      70
electric     67
rock         61
dark         48
dragon       45
ghost        44
fighting     42
ground       42
poison       42
steel        41
ice          32
fairy        29
flying       10
Name: count, dtype: int64
----------------------------------------
type 2 count: 

Type 2
flying      110
psychic      47
poison       44
ground       40
fairy        40
fighting     40
dragon       37
steel        34
ghost        32
grass        31
dark         28
water        22
ice          21
fire         18
rock         16
normal       13
electric     10
bug           9
Name: count, dtype: int64
----------------------------------------
['grass' 'fire' 'water' 'bug' 'normal' 'poison' 'electric' 'ground'
 'fairy' 'fighting' 'psychic' 'rock' 'ghost' 'ice' 'dragon' 'dark' 'steel'
 'flying']


In [9]:
tot_count = type1_counts.add(type2_counts, fill_value = 0)
print(tot_count)


bug          99
dark         76
dragon       82
electric     77
fairy        69
fighting     82
fire         88
flying      120
ghost        76
grass       139
ground       82
ice          53
normal      136
poison       86
psychic     117
rock         77
steel        75
water       162
Name: count, dtype: int64


In [10]:
poke_dummies = pd.get_dummies(poke['Type 2'], prefix='type2')
poke_dummies = poke_dummies.astype(int)
poke = pd.concat([poke, poke_dummies], axis = 1)

In [11]:
poke_dummies = pd.get_dummies(poke['Type 1'], prefix = 'type1')
poke_dummies = poke_dummies.astype(int)
poke = pd.concat([poke, poke_dummies], axis= 1)

In [12]:
wg = poke[(poke['Type 1'].isin(['water','grass'])) | (poke['Type 2'].isin(['water','grass']))]
wg = wg.loc[:, (wg != 0).any(axis=0)]

In [13]:
wg['type2_nan'] = wg['Type 2'].isna().astype(int)

In [14]:
for i in wg.columns:
    print(i)

#
Name
Type 1
Type 2
Total
HP
Attack
Defense
Sp. Atk
Sp. Def
Speed
Generation
Legendary
type2_bug
type2_dark
type2_dragon
type2_electric
type2_fairy
type2_fighting
type2_flying
type2_ghost
type2_grass
type2_ground
type2_ice
type2_normal
type2_poison
type2_psychic
type2_rock
type2_steel
type2_water
type1_bug
type1_dark
type1_dragon
type1_electric
type1_fire
type1_flying
type1_ghost
type1_grass
type1_ground
type1_ice
type1_normal
type1_poison
type1_psychic
type1_rock
type1_water
type2_nan


In [15]:
count = wg[wg['type2_nan'] == 1]
print(len(count))

124


In [16]:
wg['water'] = ((wg['type1_water'] == 1) | (wg['type2_water'] == 1)).astype(int)
wg['grass'] = ((wg['type1_grass'] == 1) | (wg['type2_grass'] == 1)).astype(int)

In [17]:
wg['Legendary'] = wg['Legendary'].astype(int)

In [18]:
water = wg[(wg['Type 1'].isin(['water'])) | (wg['Type 2'].isin(['water']))]
grass = wg[(wg['Type 1'].isin(['grass'])) | (wg['Type 2'].isin(['grass']))]


In [19]:
same_columns = water.columns.equals(grass.columns)
print(same_columns)

True


In [20]:
water = water.drop(columns=['Type 1', "Type 2", '#'])
grass = grass.drop(columns=['Type 1', "Type 2", '#'])

In [21]:
water.to_csv('water.csv')
grass.to_csv('grass.csv')
wg.to_csv('water_grass.csv')

In [22]:
water1 = water.sample(frac = 0.5)
water2 = water.drop(water1.index)

grass1 = grass.sample(frac = 0.5)
grass2 = grass.drop(grass1.index)

In [23]:
water1.to_csv('water1.csv')
water2.to_csv('water2.csv')
grass1.to_csv('grass1.csv')
grass2.to_csv('grass2.csv')