In [156]:
import pandas as pd

# Access Elements Using Labels

In [157]:
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5},
          {'bikes': 3, 'pants': 7, 'watches': 15, 'glasses': 21}]

In [158]:
store_items = pd.DataFrame(items2)
store_items

Unnamed: 0,bikes,pants,watches,glasses
0,20,30,35,
1,15,5,10,50.0
2,3,7,15,21.0


In [159]:
store_items_labeled = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,
store 2,15,5,10,50.0
store 3,3,7,15,21.0


### Access by Columns

In [160]:
# one column
print('How many bikes are in each store:')
store_items[['bikes']]

How many bikes are in each store:


Unnamed: 0,bikes
0,20
1,15
2,3


In [161]:
# Multiple columns
print('How many bikes and pants are in each store:')
store_items[['bikes', 'pants']]

How many bikes and pants are in each store:


Unnamed: 0,bikes,pants
0,20,30
1,15,5
2,3,7


### Access by Rows (Integer Labels)

In [162]:
# One row
print('What items are in Store 1:')
store_items.iloc[[0]]

What items are in Store 1:


Unnamed: 0,bikes,pants,watches,glasses
0,20,30,35,


In [163]:
# Multiple rows
print('What items are in Store 1 & Store 2:')
store_items.iloc[[0,1]] # Access by rows (one)

What items are in Store 1 & Store 2:


Unnamed: 0,bikes,pants,watches,glasses
0,20,30,35,
1,15,5,10,50.0


### Access by Rows (Custom Labels)

In [164]:
# One row
print('What items are in Store 1:')
store_items_labeled.loc[['store 1']]

What items are in Store 1:


Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,


In [165]:
# Multiple rows
print('What items are in Store 1 & Store 2:')
store_items_labeled.loc[['store 1', 'store 2']]

What items are in Store 1 & Store 2:


Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,
store 2,15,5,10,50.0


### Access by Columns & Rows
**Important Notes:**
* **In case of direct access:** When combining rows and columns, you must provide the column(s) label(s) first.
* **In case of using loc method:** you provide DataFrame.loc[ [rows index], [ columns index] ] as usual.

In [166]:
print('How many bikes are in Store 2:')
store_items['bikes'][1]

How many bikes are in Store 2:


15

In [167]:
print('How many bikes are in Store 2:')
store_items.loc[[1], ['bikes']]

How many bikes are in Store 2:


Unnamed: 0,bikes
1,15


In [168]:
print('How many bikes are in Store 2:')
store_items_labeled['bikes'][1]

How many bikes are in Store 2:


15

In [169]:
print('How many bikes are in Store 2:')
store_items_labeled['bikes'].loc['store 2']

How many bikes are in Store 2:


15

In [170]:
print('How many bikes are in Store 2:')
store_items_labeled['bikes']['store 2']

How many bikes are in Store 2:


15

# Adding Data to DataFrames

### Adding Columns (New Data)

In [171]:
# We add a new column named shirts to our store_items DataFrame indicating the number of
# shirts in stock at each store. We will put 15 shirts in store 1 and 2 shirts in store 2
store_items_labeled['shirts'] = [15,2, 9]
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses,shirts
store 1,20,30,35,,15
store 2,15,5,10,50.0,2
store 3,3,7,15,21.0,9


In [172]:
# We make a new column called suits by adding the number of shirts and pants
store_items_labeled['suits'] = store_items_labeled['pants'] + store_items_labeled['shirts']
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15,45
store 2,15,5,10,50.0,2,7
store 3,3,7,15,21.0,9,16


### Adding Columns (Existing Data)
1. Slice a Series from the DataFrame.
2. Use the "Dictionary of Series" method to add the column

In [173]:
store_items_labeled['watches'][1:]

store 2    10
store 3    15
Name: watches, dtype: int64

In [174]:
# We add a new column using data from particular rows in the watches column
store_items_labeled['new watches'] = store_items_labeled['watches'][1:] #index 0 at the series will be NaN
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses,shirts,suits,new watches
store 1,20,30,35,,15,45,
store 2,15,5,10,50.0,2,7,10.0
store 3,3,7,15,21.0,9,16,15.0


### Adding Columns at  a Specific Location (Insert)
* **DataFrame.insert(loc, label, data):** loc is the location the new column will be inserted in, label is the column label and data is a list of the items will be added to the coulmn.

**Note:** the data attribute's list must be the same length as the DataFrame Rows.

In [175]:
# We insert a new column with label shoes right before the column with numerical index 4
store_items_labeled.insert(4, 'shoes', [8,5,0])
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits,new watches
store 1,20,30,35,,8,15,45,
store 2,15,5,10,50.0,5,2,7,10.0
store 3,3,7,15,21.0,0,9,16,15.0


### Adding Rows
**Note:** The method is list of dictionaries, where each dictionary represents a row of data.
1. Create a list of rows (dictionaries)
2. Create a DataFrame form the list
3. Append the new DataFrame to the original one.

In [176]:
#We create a dictionary from a list of Python dictionaries that will contain the number of different items at the new store
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}] #Create rows

# We create new DataFrame with the new_items and provide and index labeled store 3
new_store = pd.DataFrame(new_items, index = ['store 4']) #Create a new dataframe of that row
new_store

Unnamed: 0,bikes,pants,watches,glasses
store 4,20,30,35,4


**Notice that append returns a new data and not modifying the original one unlike loc syntax (dictionary-like).**

In [177]:
store_items_labeled = store_items_labeled.append(new_store)
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits,new watches
store 1,20,30,35,,8.0,15.0,45.0,
store 2,15,5,10,50.0,5.0,2.0,7.0,10.0
store 3,3,7,15,21.0,0.0,9.0,16.0,15.0
store 4,20,30,35,4.0,,,,


# Removing Data From DataFrames

Just as we can add rows and columns we can also delete them.
To delete rows and columns from our DataFrame we will use:
1. **DataFrame.pop(label):** only allows us to delete columns by specifying a column label to delete, this method works only with one column and returns it as a Series.
2. **DataFrame.drop(label, axis):** can be used to delete both rows and columns by use of the axis keyword.

### Remove a Column (Pop)

In [178]:
# We remove the new watches column
poped_col = store_items_labeled.pop('new watches')
store_items_labeled

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits
store 1,20,30,35,,8.0,15.0,45.0
store 2,15,5,10,50.0,5.0,2.0,7.0
store 3,3,7,15,21.0,0.0,9.0,16.0
store 4,20,30,35,4.0,,,


In [179]:
poped_col

store 1     NaN
store 2    10.0
store 3    15.0
store 4     NaN
Name: new watches, dtype: float64

### Remove Columns (Drop)

In [180]:
# We remove the watches and shoes columns
store_items_labeled = store_items_labeled.drop(['watches', 'shoes'], axis = 1)
store_items_labeled

Unnamed: 0,bikes,pants,glasses,shirts,suits
store 1,20,30,,15.0,45.0
store 2,15,5,50.0,2.0,7.0
store 3,3,7,21.0,9.0,16.0
store 4,20,30,4.0,,


### Remove Rows (Drop)

In [181]:
# We remove the store 2 and store 1 rows
store_items_labeled = store_items_labeled.drop(['store 2', 'store 1'], axis = 0)
store_items_labeled

Unnamed: 0,bikes,pants,glasses,shirts,suits
store 3,3,7,21.0,9.0,16.0
store 4,20,30,4.0,,


# Renaming
The general way to do so is to supply a dictionary of keys(current label) and values (new labels).

This works for both rows and columns, you only specifiy which to rename through **columnns** and **index** parameters.

### Rename columns

In [185]:
# We change the column label bikes to hats
store_items_labeled = store_items_labeled.rename(columns = {'bikes': 'hats'})
store_items_labeled

Unnamed: 0,hats,pants,glasses,shirts,suits
store 3,3,7,21.0,9.0,16.0
store 4,20,30,4.0,,


In [186]:
# We change the column label hats to bikes and pants to trousers.
store_items_labeled = store_items_labeled.rename(columns = {'hats': 'bikes', 'pants': 'trousers'})
store_items_labeled

Unnamed: 0,bikes,trousers,glasses,shirts,suits
store 3,3,7,21.0,9.0,16.0
store 4,20,30,4.0,,


### Rename Rows

In [187]:
# We change the row label from store 3 to last store
store_items_labeled = store_items_labeled.rename(index = {'store 4': 'last store'})
store_items_labeled

Unnamed: 0,bikes,trousers,glasses,shirts,suits
store 3,3,7,21.0,9.0,16.0
last store,20,30,4.0,,


# Create Index Labels From an Existing Column

In [188]:
# We change the row index to be the data in the pants column
store_items_labeled = store_items_labeled.set_index('bikes')
store_items_labeled

Unnamed: 0_level_0,trousers,glasses,shirts,suits
bikes,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,7,21.0,9.0,16.0
20,30,4.0,,
