# Dataframes

In [1]:
import pandas as pd

#### Creating a DataFrame from a dictionary of series

In [2]:
# a dictionary of panda's series
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

In [5]:
# DataFrame fromt he dictionary
shopping_carts = pd.DataFrame(items)

shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


##### Attributes of a DataFrame

In [23]:
print(f"shopping_cart has shape: {shopping_carts.shape}\n")
print(f"shopping_cart has dimnesions: {shopping_carts.ndim}\n")
print(f"shopping_cart has a total of: {shopping_carts.size} elements\n")
print(f"the data in shopping_cart is: \n{shopping_carts.values}\n")
print(f"the row index of shopping_cart is: {shopping_carts.index}\n")
print(f"the columns in shopping_cart is: {shopping_carts.columns}\n")

shopping_cart has shape: (5, 2)

shopping_cart has dimnesions: 2

shopping_cart has a total of: 10 elements

the data in shopping_cart is: 
[[245. 500.]
 [ nan  40.]
 [ nan 110.]
 [ 25.  45.]
 [ 55.  nan]]

the row index of shopping_cart is: Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

the columns in shopping_cart is: Index(['Bob', 'Alice'], dtype='object')



If we only wanted to add bob's data to the dataframe we can do so with the colums keywords

In [25]:
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])

bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


If we only wanted to create a DataFrame that only has selected indexs

In [28]:
pants_books_cart = pd.DataFrame(items, index=['pants', 'book'])

pants_books_cart

Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


#### Creating a DataFrame from a dictionary of lists

In [29]:
# a dictionary of arrays

data = {'Integers' : [1,2,3],
         'Floats' : [1.1,2.2,3.3]}

df = pd.DataFrame(data)

df

Unnamed: 0,Integers,Floats
0,1,1.1
1,2,2.2
2,3,3.3


Adding an index

In [31]:
df_with_labels = pd.DataFrame(data, index = ['label 1', 'label 2', 'label 3'])

df_with_labels

Unnamed: 0,Integers,Floats
label 1,1,1.1
label 2,2,2.2
label 3,3,3.3


#### Creating a DataFrame from a list of dictionaries 

In [35]:
items2 = [{'bikes': 20, 'pants': 5, 'watches': 35}, 
          {'watches': 5, 'glasses': 10}]

items_df = pd.DataFrame(items2)

items_df

Unnamed: 0,bikes,pants,watches,glasses
0,20.0,5.0,35,
1,,,5,10.0


The first dictionary goes in the first row and so on.

In [41]:
# adding indexes

items_df2 = pd.DataFrame(items2, index=['store 1', 'store 2'])

items_df2

Unnamed: 0,bikes,pants,watches,glasses
store 1,20.0,5.0,35,
store 2,,,5,10.0


#### Accessing

Accessing a row

In [48]:
items_df2.loc[["store 1"]]

Unnamed: 0,bikes,pants,watches,glasses
store 1,20.0,5.0,35,


Accessing a column 

In [49]:
items_df2[['bikes']]

Unnamed: 0,bikes
store 1,20.0
store 2,


#### Adding to a DataFrame

Adding a column

In [54]:
items_df2["skirts"] = [15, 2]

items_df2

Unnamed: 0,bikes,pants,watches,glasses,skirts
store 1,20.0,5.0,35,,15
store 2,,,5,10.0,2


Adding values using arithmetic of other columns

In [56]:
items_df2['nerds'] = items_df2['bikes'] + items_df2['glasses']

items_df2

Unnamed: 0,bikes,pants,watches,glasses,skirts,nerds
store 1,20.0,5.0,35,,15,
store 2,,,5,10.0,2,


Adding a row

In [68]:
# first create a list of dictionaries for each row
store_3 = [{'bikes': 10, 'watches': 5, 'skirts': 20}]

# then create a dataframe
df_store_3 = pd.DataFrame(store_3, index=['store 3'])

# then append it to the other dataframe
items_df2 = items_df2.append(df_store_3)

items_df2

Unnamed: 0,bikes,pants,watches,glasses,skirts,nerds,fancy watches
store 1,20.0,5.0,35,,15,,
store 2,,,5,10.0,2,,5.0
store 3,10.0,,5,,20,,


In [69]:
items_df2['fancy watches'] = items_df2['watches'][1:]

items_df2

Unnamed: 0,bikes,pants,watches,glasses,skirts,nerds,fancy watches
store 1,20.0,5.0,35,,15,,
store 2,,,5,10.0,2,,5.0
store 3,10.0,,5,,20,,5.0


Adding a column with insert

In [72]:
items_df2.insert(4, 'shoes', [8,5,10])

items_df2

Unnamed: 0,bikes,pants,watches,glasses,shoes,skirts,nerds,fancy watches
store 1,20.0,5.0,35,,8,15,,
store 2,,,5,10.0,5,2,,5.0
store 3,10.0,,5,,10,20,,5.0


Deleting a row

In [74]:
items_df2.pop('fancy watches')

items_df2

Unnamed: 0,bikes,pants,watches,glasses,shoes,skirts,nerds
store 1,20.0,5.0,35,,8,15,
store 2,,,5,10.0,5,2,
store 3,10.0,,5,,10,20,


Drop can be used to delete both rows and columns using the axis key word

In [78]:
items_df2 = items_df2.drop('nerds', axis = 1)

In [None]:
items_df2 = items_df2.drop('store 3', axis = 0)

items_df2

Renaming columns or rows with .rename()

In [82]:
items_df2

Unnamed: 0,bikes,pants,watches,glasses,shoes,skirts
store 1,20.0,5.0,35,,8,15
store 2,,,5,10.0,5,2


In [83]:
items_df2.rename(index = {'store 1': 'Bobs', 'store 2': 'Johns'})

Unnamed: 0,bikes,pants,watches,glasses,shoes,skirts
Bobs,20.0,5.0,35,,8,15
Johns,,,5,10.0,5,2


In [84]:
items_df2.rename(columns = {'pants': 'shorts', 'skirts': 'thongs'})

Unnamed: 0,bikes,shorts,watches,glasses,shoes,thongs
store 1,20.0,5.0,35,,8,15
store 2,,,5,10.0,5,2
