# Pandas DataFrame

In [4]:
import pandas as pd

#create a pandas dataframe based on 3 people, what they buy, and how much it cost. Index them based on the store
purchase_1 = pd.Series({'Name': 'Luke',
                        'Item Purchased': 'Beer',
                        'Cost': 30.00})
purchase_2 = pd.Series({'Name': 'Bennett',
                        'Item Purchased': 'Rice',
                        'Cost': 20.00})
purchase_3 = pd.Series({'Name': 'Lexa',
                        'Item Purchased': 'Birds',
                        'Cost': 100.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()

Unnamed: 0,Cost,Item Purchased,Name
Store 1,30.0,Beer,Luke
Store 1,20.0,Rice,Bennett
Store 2,100.0,Birds,Lexa


In [7]:
#if there's only one value in the index that matches your query, one object will be returned to you as a Series
df.loc['Store 2']

Cost                100
Item Purchased    Birds
Name               Lexa
Name: Store 2, dtype: object

In [8]:
type(df.loc['Store 2'])

pandas.core.series.Series

In [5]:
#if multiple items match, a new DataFrame will be returned to you
df.loc['Store 1']

Unnamed: 0,Cost,Item Purchased,Name
Store 1,30.0,Beer,Luke
Store 1,20.0,Rice,Bennett


In [9]:
type(df.loc['Store 1'])

pandas.core.frame.DataFrame

In [12]:
#you can locate date using multiple axes by using .loc() with 2 parameters

#get the cost of all Store 1 items
df.loc['Store 1', 'Cost']

Store 1    30.0
Store 1    20.0
Name: Cost, dtype: float64

In [22]:
#you can pass a list of columns after the index value to .loc() to return multiple columns at once
df.loc['Store 1', ['Cost', 'Name']]

Unnamed: 0,Cost,Name
Store 1,30.0,Luke
Store 1,20.0,Bennett


In [13]:
#you can transpose the DataFrame by using the .T attribute
df.T

Unnamed: 0,Store 1,Store 1.1,Store 2
Cost,30,20,100
Item Purchased,Beer,Rice,Birds
Name,Luke,Bennett,Lexa


#### The below method is called chaining. It returns the same results for the costs of Store 1 above, but chaining comes with side-effects. It has pandas return a copy of the DataFrame instead of a view.  Keep this in mind if you are trying to change data.

In [16]:
df.loc['Store 1']['Cost']

Store 1    30.0
Store 1    20.0
Name: Cost, dtype: float64

In [26]:
#.loc() also supports slicing 
#return all rows and these 2 columns
df.loc[:, ['Cost','Item Purchased']]

Unnamed: 0,Cost,Item Purchased
Store 1,30.0,Beer
Store 1,20.0,Rice
Store 2,100.0,Birds


### Dropping Data from DataFrames

In [28]:
#drop takes a single parameter, and index or row label, and drops that label
df.drop('Store 1')

Unnamed: 0,Cost,Item Purchased,Name
Store 2,100.0,Birds,Lexa


In [29]:
#notice however that this did not change the DataFrame df. It is returning a copy.
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,30.0,Beer,Luke
Store 1,20.0,Rice,Bennett
Store 2,100.0,Birds,Lexa


In [34]:
#make a copy and change it
df_copy = df.copy()
df_copy = df_copy.drop('Store 1')
df_copy

Unnamed: 0,Cost,Item Purchased,Name
Store 2,100.0,Birds,Lexa


#### Two useful parameters for drop() are inplace and axis. inplace defaults to false and axis defaults to 0. We can also delete a column by using the del keyword in combination with the indexing operator. This changes the df in place and doesn't return a view.

In [35]:
del df_copy['Name']
df_copy

Unnamed: 0,Cost,Item Purchased
Store 2,100.0,Birds


## Adding columns 

In [37]:
#broadcast the None value to all rows for the new column called "Nickname"
df['Nickname'] = None
df

Unnamed: 0,Cost,Item Purchased,Name,Nickname
Store 1,30.0,Beer,Luke,
Store 1,20.0,Rice,Bennett,
Store 2,100.0,Birds,Lexa,
