# Configuring pandas

In [1]:
# import numpy and pandas
import numpy as np
import pandas as pd

# used for dates
import datetime
from datetime import datetime, date

# Set some pandas options controlling output format
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 60)

# bring in matplotlib for graphics
import matplotlib.pyplot as plt
%matplotlib inline

# Merging and join data

In [2]:
# these are our customers
customers = {'CustomerID': [10, 11],
             'Name': ['Mike', 'Marcia'],
             'Address': ['Address for Mike',
                         'Address for Marcia']}
customers = pd.DataFrame(customers)
customers

              Address  CustomerID    Name
0    Address for Mike          10    Mike
1  Address for Marcia          11  Marcia

In [3]:
# and these are the orders made by our customers
# they are related to customers by CustomerID
orders = {'CustomerID': [10, 11, 10],
          'OrderDate': [date(2014, 12, 1),
                        date(2014, 12, 1),
                        date(2014, 12, 1)]}
orders = pd.DataFrame(orders)
orders

   CustomerID   OrderDate
0          10  2014-12-01
1          11  2014-12-01
2          10  2014-12-01

In [4]:
# merge customers and orders so we can ship the items
customers.merge(orders)

              Address  CustomerID    Name   OrderDate
0    Address for Mike          10    Mike  2014-12-01
1    Address for Mike          10    Mike  2014-12-01
2  Address for Marcia          11  Marcia  2014-12-01

In [5]:
# data to be used in the remainder of this section's examples
left_data = {'key1': ['a', 'b', 'c'], 
            'key2': ['x', 'y', 'z'],
            'lval1': [ 0, 1, 2]}
right_data = {'key1': ['a', 'b', 'c'],
              'key2': ['x', 'a', 'z'], 
              'rval1': [ 6, 7, 8 ]}
left = pd.DataFrame(left_data, index=[0, 1, 2])
right = pd.DataFrame(right_data, index=[1, 2, 3])
left

  key1 key2  lval1
0    a    x      0
1    b    y      1
2    c    z      2

In [6]:
right

  key1 key2  rval1
1    a    x      6
2    b    a      7
3    c    z      8

In [7]:
# demonstrate merge without specifying columns to merge
# this will implicitly merge on all common columns
left.merge(right)

  key1 key2  lval1  rval1
0    a    x      0      6
1    c    z      2      8

In [8]:
# demonstrate merge using an explicit column
# on needs the value to be in both DataFrame objects
left.merge(right, on='key1')

  key1 key2_x  lval1 key2_y  rval1
0    a      x      0      x      6
1    b      y      1      a      7
2    c      z      2      z      8

In [9]:
# merge explicitly using two columns
left.merge(right, on=['key1', 'key2'])

  key1 key2  lval1  rval1
0    a    x      0      6
1    c    z      2      8

In [10]:
# join on the row indices of both matrices
pd.merge(left, right, left_index=True, right_index=True)

  key1_x key2_x  lval1 key1_y key2_y  rval1
1      b      y      1      a      x      6
2      c      z      2      b      a      7