# Pandas
Pandas is a python library built on top of Numpy, used for data manipulation and analysis.
## Pandas series vs pandas dataframes
A pandas series is a 1d array-like object that can hold many data types.
Pandas series are mutable

In [3]:
import pandas as pd

In [4]:
#creating a pandas series
groceries = pd.Series(data = [30, 6, 'Yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [5]:
print("Groceries has shape:", groceries.shape)
print("Groceries has dimension:", groceries.ndim)
print("Groceries has a total of:", groceries.size, "elements")

Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of: 4 elements


In [6]:
'bananas' in groceries

False

In [7]:
'bread' in groceries

True

## Accessing and deleting elements in Pandas Series

In [8]:
groceries['eggs']

30

In [9]:
groceries[['eggs', 'milk']]

eggs     30
milk    Yes
dtype: object

In [10]:
groceries[0]

30

In [11]:
groceries[3]

'No'

In [12]:
groceries['eggs'] = 12
groceries

eggs       12
apples      6
milk      Yes
bread      No
dtype: object

In [13]:
groceries.drop('apples')
# returns a modified version of the series without changing it

eggs      12
milk     Yes
bread     No
dtype: object

In [14]:
groceries

eggs       12
apples      6
milk      Yes
bread      No
dtype: object

In [15]:
groceries.drop('apples', inplace = True)
# deletes the element and modifies the series
groceries

eggs      12
milk     Yes
bread     No
dtype: object

# Arithmetic operations on Pandas Series

In [16]:
fruits = pd.Series([10, 6, 3], ['apples', 'oranges', 'bananas'])
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [17]:
print(fruits + 2)


apples     12
oranges     8
bananas     5
dtype: int64


In [18]:
print(fruits - 2)

apples     8
oranges    4
bananas    1
dtype: int64


In [19]:
print(fruits * 2)

apples     20
oranges    12
bananas     6
dtype: int64


In [20]:
print(fruits - 2)

apples     8
oranges    4
bananas    1
dtype: int64


In [21]:
import numpy as np
np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

In [22]:
np.power(fruits, 2)

apples     100
oranges     36
bananas      9
dtype: int64

In [23]:
np.exp(fruits)

apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

In [24]:
fruits[['bananas']] + 2

bananas    5
dtype: int64

# Pandas dataframes
Pandas dataframe is a 2d object with labelled rows and columns which can also hold multiple datatypes

In [25]:
items = {'Bob': pd.Series([245, 25, 55], index = ['bike', 'pants', 'watch']), 
         'Alice': pd.Series([40, 10, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}
type(items)

dict

In [26]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,10.0
pants,25.0,45.0
watch,55.0,


In [27]:
data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}
df = pd.DataFrame(data)
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [28]:
bob_shopping_cart = pd.DataFrame(items, columns = ['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [29]:
sel_shopping_cart = pd.DataFrame(items, index = ['pants', 'book'])
sel_shopping_cart

Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


In [30]:
shopping_carts.shape

(5, 2)

In [31]:
alice_shopping_cart = pd.DataFrame(items, index = ['pants', 'book'], columns = ['Bob'])
alice_shopping_cart

Unnamed: 0,Bob
pants,25.0
book,


In [32]:
# creating a dataframe with labelled indices
data = {'Integers': [1, 2, 3], 'Floats': [2.4, 9.0, 5.6]}
df = pd.DataFrame(data, index = ['Label 1', 'Label 2', 'Label 3'])
df

Unnamed: 0,Integers,Floats
Label 1,1,2.4
Label 2,2,9.0
Label 3,3,5.6


# Accessing elements in Pandas dataframes

In [33]:
items = [{'bikes': 20, 'pants': 30, 'watches': 35}, {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants': 5}]
store_items = pd.DataFrame(items, index = ['store 1', 'store 2'])
store_items

Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,
store 2,15,5,10,50.0


In [34]:
store_items['bikes']

store 1    20
store 2    15
Name: bikes, dtype: int64

In [35]:
store_items['bikes']['store 2']

15

The main diff b/w the 'loc' and 'iloc' methods is that loc gets rows (and/or columns) with particular labels while iloc gets rows (and/or columns) at integer locations.

In [36]:
store_items.loc[['store 1']]
# returns as a dataframe

Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,


In [37]:
store_items.loc['store 1']
# returns a listing of the elements

bikes      20.0
pants      30.0
watches    35.0
glasses     NaN
Name: store 1, dtype: float64

In [38]:
store_items['shirts'] = [15, 8]
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts
store 1,20,30,35,,15
store 2,15,5,10,50.0,8


In [39]:
store_items['suits'] = store_items['shirts'] + store_items['pants']
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15,45
store 2,15,5,10,50.0,8,13


In [40]:
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]
new_store = pd.DataFrame(new_items, index = ['store 3'])
new_store

Unnamed: 0,bikes,pants,watches,glasses
store 3,20,30,35,4


In [44]:
store_items = store_items.append(new_store)
store_items
#'append' is outdated and pd.concat() is now used

  store_items = store_items.append(new_store)


Unnamed: 0,bikes,pants,watches,glasses,shirts,suits,new_watches
store 1,20,30,35,,15.0,45.0,
store 2,15,5,10,50.0,8.0,13.0,10.0
store 3,20,30,35,4.0,,,


In [41]:
print(pd.concat([store_items, new_store]))

         bikes  pants  watches  glasses  shirts  suits
store 1     20     30       35      NaN    15.0   45.0
store 2     15      5       10     50.0     8.0   13.0
store 3     20     30       35      4.0     NaN    NaN


In [45]:
store_items['new_watches'] = store_items['watches'][1:]
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits,new_watches
store 1,20,30,35,,15.0,45.0,
store 2,15,5,10,50.0,8.0,13.0,10.0
store 3,20,30,35,4.0,,,35.0


In [46]:
store_items.insert(5, 'shoes', [2, 5, 0])
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,shoes,suits,new_watches
store 1,20,30,35,,15.0,2,45.0,
store 2,15,5,10,50.0,8.0,5,13.0,10.0
store 3,20,30,35,4.0,,0,,35.0


'pop' is used to delete columns, 'drop' is used to delete both rows and columns by making use of the 'axis' keyword

In [50]:
store_items.insert(4, 'socks', [13, 9, 1])
store_items

Unnamed: 0,bikes,pants,watches,glasses,socks,shirts,shoes,suits,new_watches
store 1,20,30,35,,13,15.0,2,45.0,
store 2,15,5,10,50.0,9,8.0,5,13.0,10.0
store 3,20,30,35,4.0,1,,0,,35.0


In [51]:
store_items.pop('new_watches')
store_items

Unnamed: 0,bikes,pants,watches,glasses,socks,shirts,shoes,suits
store 1,20,30,35,,13,15.0,2,45.0
store 2,15,5,10,50.0,9,8.0,5,13.0
store 3,20,30,35,4.0,1,,0,


In [52]:
store_items = store_items.drop(['socks'], axis = 1)
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,shoes,suits
store 1,20,30,35,,15.0,2,45.0
store 2,15,5,10,50.0,8.0,5,13.0
store 3,20,30,35,4.0,,0,


In [53]:
store_items = store_items.drop(['store 3'], axis = 0)
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,shoes,suits
store 1,20,30,35,,15.0,2,45.0
store 2,15,5,10,50.0,8.0,5,13.0


In [55]:
# to rename a datafram row/column, use a dict with key as old name, value as new name
store_items = store_items.rename(columns = {'shoes': 'hats'})
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,hats,suits
store 1,20,30,35,,15.0,2,45.0
store 2,15,5,10,50.0,8.0,5,13.0


In [56]:
store_items = store_items.rename(index = {'store 2': 'last store'})
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts,hats,suits
store 1,20,30,35,,15.0,2,45.0
last store,15,5,10,50.0,8.0,5,13.0
