## Loading data into Pandas

In [None]:
import pandas as pd

df = pd.read_csv('pokemon_data.csv')

# will load the first 5 rows from the start
# print(df.head(5))

# delimter is the seprator which is "tab" in this case
# df = pd.read_csv('pokemon_data.txt', delimiter='\t')

df['HP']

## Reading Data in Pandas

In [None]:
#### read Headers
df.columns

## read a specific column
print(df['Name'])

## read multiple columns
print(df[['Name', 'Type 1', 'HP']])

## print specific rows
print(df.iloc[0:4])

## loop over the rows
for index, row in df.iterrows():
    print(index, row['Name'])

# get the rows that have "Type 1" column value equals to "Grass"
df.loc[df['Type 1'] == "Grass"]

## Read a specific location (R,C)
#print(df.iloc[2,1])


## Sorting/Describing Data

In [None]:

# sorting data accoring to "Name" column
df.sort_values('Name', ascending=False)

# sorting data  in an ascending way for "Type 1" and descending way for "HP"
df.sort_values(['Type 1', 'HP'], ascending=[1,0])

## Making changes to the data

In [None]:
# create a new column "Total" which is the sum of the selected columns
df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']

# deleting specific columns from the data frame
df = df.drop(columns=['Total'])

# creating the total column in another way
df['Total'] = df.iloc[:, 4:10].sum(axis=1)

# getting the list of columns in the data frame
cols = list(df.columns)  

# changing the index of the "Total" column to be in index 4
df = df[cols[0:4] + [cols[-1]]+cols[4:12]]

df.head(5)

## Saving our Data (Exporting into Desired Format)

In [None]:
# df.to_csv('modified.csv', index=False)

#df.to_excel('modified.xlsx', index=False)

# save the data frame to .txt file and seperate the values using "tab"
df.to_csv('modified.txt', index=False, sep='\t')


## Filtering Data

In [None]:
new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]

# filter the rows that have "Name" column set to "Mega" 
new_df.loc[new_df['Name'].str.contains('Mega')]

# rest indexes after filtering the data frame and drop the old ones
new_df.reset_index(drop=True, inplace=True)

new_df.to_csv('filtered.csv')



## Conditional Changes

In [None]:

# modify the "Generation" and "Legendary" columns to have values "Test 1" and "Test 2" in case "Total" is > 500
# df.loc[df['Total'] > 500, ['Generation','Legendary']] = ['Test 1', 'Test 2']

df = pd.read_csv('pokemon_data.csv')

df

## Aggregate Statistics (Groupby)


In [None]:
df = pd.read_csv('pokemon_data.csv')

df['count'] = 1

df.groupby(['Type 1', 'Type 2']).count()['count']


## Working with large amounts of data



In [None]:
new_df = pd.DataFrame(columns=df.columns)

for df in pd.read_csv('modified.csv', chunksize=5):
    results = df.groupby(['Type 1']).count()
    
    new_df = pd.concat([new_df, results])