# Pandas Series

In [3]:
import pandas as pd

groceries = pd.Series(data = [30, 6, "Yes", "No"], index = ["eggs", "apples", "milk", "bread"])
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [4]:
# We print some information about Groceries
print('Groceries has shape:', groceries.shape)
print('Groceries has dimension:', groceries.ndim)
print('Groceries has a total of', groceries.size, 'elements')

Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of 4 elements


In [5]:
groceries.values

array([30, 6, 'Yes', 'No'], dtype=object)

In [6]:
groceries.index

Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')

In [7]:
x = "bananas" in groceries
y = "bread" in groceries

print(x)
print(y)

False
True


In [8]:
# Access elements using index labels
print(groceries["eggs"])

30


In [10]:
groceries[["milk", "bread"]]

milk     Yes
bread     No
dtype: object

In [11]:
groceries[["eggs", "apples"]]

eggs      30
apples     6
dtype: object

In [12]:
groceries.loc[["eggs", "apples"]]

eggs      30
apples     6
dtype: object

In [13]:
groceries[[0,1]]

eggs      30
apples     6
dtype: object

In [15]:
groceries[[-1]]

bread    No
dtype: object

In [16]:
groceries[0]

30

In [18]:
groceries.iloc[[2, 3]]

milk     Yes
bread     No
dtype: object

In [19]:
# Change the number
groceries["eggs"] = 2
groceries

eggs        2
apples      6
milk      Yes
bread      No
dtype: object

In [20]:
# Delete elements out-of-place using drop()
groceries.drop("apples")

eggs       2
milk     Yes
bread     No
dtype: object

In [21]:
groceries.drop("apples", inplace=True)

In [22]:
groceries

eggs       2
milk     Yes
bread     No
dtype: object

Arithmetic Operations on Pandas Series

In [23]:
fruits = pd.Series(data = [10, 6, 3], index = ["apples", "oranges", "bananas"])
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [24]:
# Element-wise basic arithmetic operations
fruits + 2

apples     12
oranges     8
bananas     5
dtype: int64

In [25]:
fruits - 2

apples     8
oranges    4
bananas    1
dtype: int64

In [26]:
# Use mathematical functions from NumPy to operate on Series
import numpy as np

np.exp(fruits)

apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

In [27]:
np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

In [28]:
np.power(fruits, 2)

apples     100
oranges     36
bananas      9
dtype: int64

In [29]:
# Perform arithmetic operations on selected elements
fruits["bananas"] + 2

5

In [30]:
fruits.iloc[0] + 2

12

In [31]:
fruits[["apples", "oranges"]] + 2

apples     12
oranges     8
dtype: int64

In [33]:
fruits.loc[["apples", "oranges"]] / 2

apples     5.0
oranges    3.0
dtype: float64

In [34]:
# Perform multiplication on a Series having integer and string elements
groceries * 2

eggs          4
milk     YesYes
bread      NoNo
dtype: object

# Pandas DataFrame

In [37]:
# We create a dictionary of Pandas Series 
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

items

{'Bob': bike     245
 pants     25
 watch     55
 dtype: int64,
 'Alice': book        40
 glasses    110
 bike       500
 pants       45
 dtype: int64}

In [38]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [41]:
shopping_carts.loc["bike", "Alice"]

500.0

Example 2. DataFrame assigns the numerical row indexes by default.


In [45]:
data = {"Bob" : [245, 25, 55, 12],
        "Alice" :  [40, 110, 500, 45]}


df = pd.DataFrame(data)
df

Unnamed: 0,Bob,Alice
0,245,40
1,25,110
2,55,500
3,12,45


In [46]:
data = {"Bob" : pd.Series([245, 25, 55]),
        "Alice" :  pd.Series([40, 110, 500, 45])}


df = pd.DataFrame(data)
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [47]:
# Example 3. Demonstrate a few attributes of DataFrame
# We print some information about shopping_carts
print('shopping_carts has shape:', shopping_carts.shape)
print('shopping_carts has dimension:', shopping_carts.ndim)
print('shopping_carts has a total of:', shopping_carts.size, 'elements')
print()
print('The data in shopping_carts is:\n', shopping_carts.values)
print()
print('The row index in shopping_carts is:', shopping_carts.index)
print()
print('The column index in shopping_carts is:', shopping_carts.columns)

shopping_carts has shape: (5, 2)
shopping_carts has dimension: 2
shopping_carts has a total of: 10 elements

The data in shopping_carts is:
 [[245. 500.]
 [ nan  40.]
 [ nan 110.]
 [ 25.  45.]
 [ 55.  nan]]

The row index in shopping_carts is: Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

The column index in shopping_carts is: Index(['Bob', 'Alice'], dtype='object')


In [48]:
bob_Shopping_cart = pd.DataFrame(items, columns=["Bob"])
bob_Shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [49]:
# Example 4. Selecting specific rows of a DataFrame
sel_shopping_cart = pd.DataFrame(items, index = ["pants", "book"])
sel_shopping_cart

Unnamed: 0,Bob,Alice
pants,25.0,45
book,,40


In [50]:
# Example 5. Selecting specific columns of a DataFrame
# We Create a DataFrame that only has selected items for Alice
alice_sel_shopping_cart = pd.DataFrame(items, index = ['glasses', 'bike'], columns = ['Alice'])

# We display alice_sel_shopping_cart
alice_sel_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500


In [51]:
# Example 7. Create a DataFrame using a dictionary of lists, and custom row-indexes (labels)
# We create a dictionary of lists (arrays)
data = {'Integers' : [1,2,3],
        'Floats' : [4.5, 8.2, 9.6]}

# We create a DataFrame and provide the row index
df = pd.DataFrame(data, index = ['label 1', 'label 2', 'label 3'])

# We display the DataFrame
df

Unnamed: 0,Integers,Floats
label 1,1,4.5
label 2,2,8.2
label 3,3,9.6


In [52]:
# Example 8. Create a DataFrame using a of list of dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]

# We create a DataFrame 
store_items = pd.DataFrame(items2)

# We display the DataFrame
store_items


Unnamed: 0,bikes,pants,watches,glasses
0,20,30,35,
1,15,5,10,50.0


In [53]:
# Example 9. Create a DataFrame using a of list of dictionaries, and custom row-indexes (labels)
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2'])

# We display the DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,
store 2,15,5,10,50.0


Accessing Elements in Pandas DataFrames

In [54]:
items = [{"bikes" : 20, "pants" : 30, "watches" : 35}, {"watches": 10, "glasses" : 50, "bikes" : 15, "pants" : 5}]
store_items = pd.DataFrame(items, index = ["store 1", "store 2"])
store_items

Unnamed: 0,bikes,pants,watches,glasses
store 1,20,30,35,
store 2,15,5,10,50.0


In [55]:
# We print the store_items DataFrame
print(store_items)

# We access rows, columns and elements using labels
print()
print('How many bikes are in each store:\n', store_items[['bikes']])
print()
print('How many bikes and pants are in each store:\n', store_items[['bikes', 'pants']])
print()
print('What items are in Store 1:\n', store_items.loc[['store 1']])
print()
print('How many bikes are in Store 2:', store_items['bikes']['store 2'])

         bikes  pants  watches  glasses
store 1     20     30       35      NaN
store 2     15      5       10     50.0

How many bikes are in each store:
          bikes
store 1     20
store 2     15

How many bikes and pants are in each store:
          bikes  pants
store 1     20     30
store 2     15      5

What items are in Store 1:
          bikes  pants  watches  glasses
store 1     20     30       35      NaN

How many bikes are in Store 2: 15


Example 2. Add a column to an existing DataFrame

In [56]:
# We add a new column named shirts to our store_items DataFrame indicating the number of
# shirts in stock at each store. We will put 15 shirts in store 1 and 2 shirts in store 2
store_items['shirts'] = [15,2]

# We display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shirts
store 1,20,30,35,,15
store 2,15,5,10,50.0,2


Example 3. Add a new column based on the arithmetic operation between existing columns of a DataFrame


In [57]:
# We make a new column called suits by adding the number of shirts and pants
store_items['suits'] = store_items['pants'] + store_items['shirts']

# We display the modified DataFrame
store_items


Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15,45
store 2,15,5,10,50.0,2,7


In [58]:
# Example 4 a. Create a row to be added to the DataFrame
# We create a dictionary from a list of Python dictionaries that will contain the number of different items at the new store
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]

# We create new DataFrame with the new_items and provide and index labeled store 3
new_store = pd.DataFrame(new_items, index = ['store 3'])

# We display the items at the new store
new_store


Unnamed: 0,bikes,pants,watches,glasses
store 3,20,30,35,4


In [59]:
# We append store 3 to our store_items DataFrame
store_items = store_items.append(new_store)

# We display the modified DataFrame
store_items

  store_items = store_items.append(new_store)


Unnamed: 0,bikes,pants,watches,glasses,shirts,suits
store 1,20,30,35,,15.0,45.0
store 2,15,5,10,50.0,2.0,7.0
store 3,20,30,35,4.0,,


In [60]:
# Example 5. Add new column that has data from the existing columns
# We add a new column using data from particular rows in the watches column
store_items['new watches'] = store_items['watches'][1:]

# We display the modified DataFrame
store_items


Unnamed: 0,bikes,pants,watches,glasses,shirts,suits,new watches
store 1,20,30,35,,15.0,45.0,
store 2,15,5,10,50.0,2.0,7.0,10.0
store 3,20,30,35,4.0,,,35.0


In [61]:
# Example 6. Add new column at a specific location

# We insert a new column with label shoes right before the column with numerical index 4
store_items.insert(4, 'shoes', [8,5,0])

# we display the modified DataFrame
store_items


Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits,new watches
store 1,20,30,35,,8,15.0,45.0,
store 2,15,5,10,50.0,5,2.0,7.0,10.0
store 3,20,30,35,4.0,0,,,35.0


In [63]:
# Example 7. Delete one column from a DataFrame
# We remove the new watches column
store_items.pop('new watches')

# we display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,watches,glasses,shoes,shirts,suits
store 1,20,30,35,,8,15.0,45.0
store 2,15,5,10,50.0,5,2.0,7.0
store 3,20,30,35,4.0,0,,


In [64]:
# Example 8. Delete multiple columns from a DataFrame
# We remove the watches and shoes columns
store_items = store_items.drop(['watches', 'shoes'], axis = 1)

# we display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,glasses,shirts,suits
store 1,20,30,,15.0,45.0
store 2,15,5,50.0,2.0,7.0
store 3,20,30,4.0,,


In [65]:
# Example 9. Delete rows from a DataFrame
# We remove the store 2 and store 1 rows
store_items = store_items.drop(['store 2', 'store 1'], axis = 0)

# we display the modified DataFrame
store_items

Unnamed: 0,bikes,pants,glasses,shirts,suits
store 3,20,30,4.0,,


In [66]:
# We change the column label bikes to hats
store_items = store_items.rename(columns = {'bikes': 'hats'})

# we display the modified DataFrame
store_items

Unnamed: 0,hats,pants,glasses,shirts,suits
store 3,20,30,4.0,,


In [67]:
# We change the row label from store 3 to last store
store_items = store_items.rename(index = {'store 3': 'last store'})

# we display the modified DataFrame
store_items

Unnamed: 0,hats,pants,glasses,shirts,suits
last store,20,30,4.0,,


In [69]:
#  We change the row index to be the data in the pants column
store_items = store_items.set_index('pants')

# we display the modified DataFrame
store_items

Unnamed: 0_level_0,hats,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,4.0,,


# Dealing with Nan values

In [2]:
# We create a list of Python dictionaries
import pandas as pd
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])

# We display the DataFrame
store_items

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store 1,20,30,35,15.0,8,45.0,
store 2,15,5,10,2.0,5,7.0,50.0
store 3,20,30,35,,10,,4.0


In [3]:
# We count the number of NaN values in store_items
x =  store_items.isnull().sum().sum()

# We print x
print('Number of NaN values in our DataFrame:', x)

Number of NaN values in our DataFrame: 3


In [4]:
store_items.isnull().sum()

bikes      0
pants      0
watches    0
shirts     1
shoes      0
suits      1
glasses    1
dtype: int64

In [None]:
# We drop any rows with NaN values
# store_items.dropna(axis = 0)

# We drop any columns with NaN values
# store_items.dropna(axis = 1)

# We replace all NaN values with 0
# store_items.fillna(0)

# We replace NaN values with the previous value in the column
# store_items.fillna(method = 'ffill', axis = 0)

# We replace NaN values with the previous value in the row
# store_items.fillna(method = 'ffill', axis = 1)

# We replace NaN values with the next value in the column
# store_items.fillna(method = 'backfill', axis = 0)

# We replace NaN values with the next value in the row
# store_items.fillna(method = 'backfill', axis = 1)

# We replace NaN values by using linear interpolation using column values
# store_items.interpolate(method = 'linear', axis = 0)

# We replace NaN values by using linear interpolation using row values
# store_items.interpolate(method = 'linear', axis = 1)
