# Pandas Tutorial

Pandas is a commonly used data manipulation library in Python, especially for tabular data.

### Set up Environment

In [44]:
import numpy as np
import pandas as pd

df = pd.read_csv('sales.csv', index_col='month')

### Get Basic Information

In [54]:
print(df.info(), "\n")

print("columns attribute:", type(df.columns))
print(df.columns, "\n")

print("values attribute:", type(df.values))
print(df.values, "\n")

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, Jan to Jun
Data columns (total 3 columns):
eggs    6 non-null int64
salt    5 non-null float64
spam    6 non-null int64
dtypes: float64(1), int64(2)
memory usage: 192.0+ bytes
None 

columns attribute: <class 'pandas.core.indexes.base.Index'>
Index(['eggs', 'salt', 'spam'], dtype='object') 

values attribute: <class 'numpy.ndarray'>
[[ 47.  12.  17.]
 [110.  50.  31.]
 [221.  89.  72.]
 [ 77.  87.  20.]
 [132.  nan  52.]
 [205.  60.  55.]] 



### Using the Bracket Accessor to Select a Column

In [77]:
eggs = df['eggs']
print("Output type:", type(eggs))
print("Series name:", eggs.name)
print(eggs, "\n")

Output type: <class 'pandas.core.series.Series'>
Series name: eggs
month
Jan     47
Feb    110
Mar    221
Apr     77
May    132
Jun    205
Name: eggs, dtype: int64 



### Accessing Items in a Series

In [68]:
print('eggs["Jan"] =', eggs["Jan"]) # A series is like a fixed-size dict
print('eggs["Jul"] =', eggs.get("Jul")) # The get() method returns None if the label does not exist
print('eggs["Jul"] =', eggs.get("Jul", default=0)) # It can also return a default value

eggs["Jan"] = 47
eggs["Jul"] = None
eggs["Jul"] = 0


### Operations on Frames and Series

In [76]:
print(df[1:4] + df[['eggs', 'salt']], "\n") # Data is aligned based on label
print(eggs['Jan':'May'] + eggs[1:]) 

        eggs   salt  spam
month                    
Apr    154.0  174.0   NaN
Feb    220.0  100.0   NaN
Jan      NaN    NaN   NaN
Jun      NaN    NaN   NaN
Mar    442.0  178.0   NaN
May      NaN    NaN   NaN 

month
Apr    154.0
Feb    220.0
Jan      NaN
Jun      NaN
Mar    442.0
May    264.0
Name: eggs, dtype: float64


### Using the Bracket Accessor to Select Multiple Columns

In [58]:
cols = df[['eggs', 'salt']]
print("Output type when multiple columns are selected:", type(cols), "\n")
print(cols)

Output type when multiple columns are selected: <class 'pandas.core.frame.DataFrame'> 

       eggs  salt
month            
Jan      47  12.0
Feb     110  50.0
Mar     221  89.0
Apr      77  87.0
May     132   NaN
Jun     205  60.0


### The Bracket Accessor Returns Rows when Passed a Slice

In [31]:
rows = df[0:2]
print("Output type:", type(rows))
print("Output data:\n", rows)

Output type: <class 'pandas.core.frame.DataFrame'>
Output data:
        eggs  salt  spam
month                  
Jan      47  12.0    17
Feb     110  50.0    31


### loc & iloc are the Preferred Accessors

In [79]:
print(df.loc['Feb', 'salt'])
print(df.iloc[1, 1])

50.0
50.0
