### Pandas

Exploratory data analysis with Pandas on a Pokemon dataset. 
The following functions were used: 
    
    1. read_csv()
    2. head()
    3. tail()
    4. iterrows() 
    5. loc 
    6. iloc
    7. describe()
    8. sort_values()
    9. drop()
    10. to_csv(), to_excel()
    11. statistical functions - groupby(), mean(), sum(), count()


### Load data into pandas

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('C:\\Users\\ikennan\\Documents\\pokemon_data.csv')

In [None]:
df

In [None]:
#view first 3 rows 

df.head(3)

In [None]:
#view bottom 3 rows 

df.tail(3)

### Reading data in pandas

In [None]:
# read headers 
df.columns

In [None]:
# read specific column 
df['Name'][0:10]

#multiple columns 
df[['Name', 'Type 1', 'Type 2']] 

#read rows
df.loc[0:5]

#read specific location 
df.iloc[2,1]

In [None]:
#iterate through rows
for index, row in df.iterrows():
    print(index, row['Name'])

In [None]:
#filter using df.loc
df.loc[df['Name'] == 'Pikachu']

df.loc[df['Type 1'] == 'Electric']

df.loc[df['Speed'] > 100]

### Sorting/Describing Data 

In [None]:
#view some basic statistical details like percentile, mean, std etc. of a data frame
df.describe()

In [None]:
#sort values - ascending/descending 

df.sort_values('Name')
df.sort_values('Defense', ascending=False) #descending order 

df.sort_values(['HP', 'Attack']) #sort multiple columns 
df.sort_values(['Type 1', 'HP'], ascending=[1,0])

### Making changes to data

In [None]:
#view headers
df.columns

In [None]:
#add columns - method 1
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['HP'] + df['Sp. Atk'] + df['Sp. Def']
df.head(10)

In [None]:
#drop column(s)
df = df.drop(columns=['Total'])
df

In [None]:
#add column - method 2 

df['Total'] = df.iloc[:, 4:10].sum(axis=1)
df

In [None]:
#rearrange columns 
cols = list(df.columns.values)
df = df[cols[0:4] + [cols[-1]] + cols[4:12]]
df.head(10)

#sort by total strength 
df.sort_values('Total', ascending=False)


### Exporting Data 

In [None]:
df.to_csv('C:\\Users\\ikennan\\Documents\\pokemon_data_m.csv', index=False)

### Filtering data 

In [None]:
df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')] #and 

df.loc[(df['Type 1'] == 'Fire') | (df['Type 2'] == 'Water')] #or 

new_df = df.loc[(df['Type 1'] == 'Fire') | (df['Type 2'] == 'Water') & (df['HP'] >= 70)] #and & or

new_df.to_csv('C:\\Users\\ikennan\\Documents\\pokemon_data_filtered.csv', index=False) #export to csv

# new_df = new_df.reset_index(drop=True) #reset index on new file

new_df.reset_index(drop=True, inplace=True) #conserves memory 

new_df

In [None]:
df.loc[df['Name'].str.contains('Mega')] #filters for all occurences of 'mega' in field 

df.loc[~df['Name'].str.contains('Mega')] #filter for all occurences that DO NOT contain 'Mega'

In [None]:
#using regular expressions for filtering

import re

df.loc[df['Type 1'].str.contains('Fire|Grass', regex=True)]
#df.loc[df['Type 1'].str.contains('fire|grass', flags=re.I, regex=True)] #ignores casing

In [None]:
#filter names starting with 'pi' using regex

df.loc[df['Name'].str.contains('^pi[a-z]*', flags=re.I, regex=True)]

### Conditional Changes

In [None]:
changed_df = df.copy(deep=True)

changed_df.loc[changed_df['Type 1'] == 'Fire', 'Type 1'] = 'Flamer' #change value of a cell 

changed_df.loc[changed_df['Type 1'] == 'Flamer', 'Legendary'] = True

changed_df

In [None]:
#modify multiple columns 
changed_df.loc[changed_df['Total'] > 500, ['Generation', 'Legendary']] = 'TEST VALUE'

changed_df.loc[changed_df['Total'] > 500, ['Generation', 'Legendary']] = ['Test1', 'Test2']

changed_df

### Aggregate Statistics (Groupby)

In [None]:
df.groupby(['Type 1']).mean()

df.groupby(['Type 1']).mean().sort_values('Defense', ascending=False)

df.groupby(['Type 1']).sum()

df.groupby(['Type 1']).count()

#tidy up count 
df['Count'] = 1 
df.groupby(['Type 1']).count()['Count']

df.groupby(['Type 1', 'Type 2']).count()['Count']

### Working with large amounts of data

In [None]:
#break large datasets into smaller, more manageable chunks 
for df in pd.read_csv('C:\\Users\\ikennan\\Documents\\pokemon_data_m.csv', chunksize=5):
    print('Chunk DF')
    print(df)

In [None]:
df