In [1]:
# Introduce data as list of tuples splitted by date

orders_2022_02_04 = [
    (9423517, '2022-02-04', 9001),
    (4626232, '2022-02-04', 9003),
    (9423534, '2022-02-04', 9001)
]

orders_2022_02_05 = [
    (9423679, '2022-02-05', 9002),
    (4626377, '2022-02-05', 9003),
    (4626412, '2022-02-05', 9004)
]

orders_2022_02_06 = [
    (9423783, '2022-02-06', 9002),
    (4626490, '2022-02-06', 9004)
]

In [2]:
# Join into a single list using `+` operator
orders = orders_2022_02_04 + orders_2022_02_05 + orders_2022_02_06

In [3]:
display(orders)

[(9423517, '2022-02-04', 9001),
 (4626232, '2022-02-04', 9003),
 (9423534, '2022-02-04', 9001),
 (9423679, '2022-02-05', 9002),
 (4626377, '2022-02-05', 9003),
 (4626412, '2022-02-05', 9004),
 (9423783, '2022-02-06', 9002),
 (4626490, '2022-02-06', 9004)]

In [4]:
# Dictionaries can be "combined" using `**` operator

extra_fields_9423517 = {
    'ShippingInstrustions' : {
        'name' : 'John Silver',
        'Phone' : [
            {
                'type' : 'Office',
                'number' : '809-123-9309'
            },
            {
                'type' : 'Mobile',
                'number' : '417-123-4567'
            }
        ]
    }
}

order_9423517 = {
    'OrderNo': 9423517,
    'Date': '2022-02-04',
    'Empno': 9001
}

order_9423517 = {
    **order_9423517,
    **extra_fields_9423517
}

In [5]:
display(order_9423517)

{'OrderNo': 9423517,
 'Date': '2022-02-04',
 'Empno': 9001,
 'ShippingInstrustions': {'name': 'John Silver',
  'Phone': [{'type': 'Office', 'number': '809-123-9309'},
   {'type': 'Mobile', 'number': '417-123-4567'}]}}

In [6]:
details = [
 (9423517, 'Jeans', 'Rip Curl', 87.0, 1),
 (9423517, 'Jacket', 'The North Face', 112.0, 1),
 (4626232, 'Socks', 'Vans', 15.0, 1),
 (4626232, 'Jeans', 'Quiksilver', 82.0, 1),
 (9423534, 'Socks', 'DC', 10.0, 2),
 (9423534, 'Socks', 'Quiksilver', 12.0, 2),
 (9423679, 'T-shirt', 'Patagonia', 35.0, 1),
 (4626377, 'Hoody', 'Animal', 44.0, 1),
 (4626377, 'Cargo Shorts', 'Animal', 38.0, 1),
 (4626412, 'Shirt', 'Volcom', 78.0, 1),
 (9423783, 'Boxer Shorts', 'Superdry', 30.0, 2),
 (9423783, 'Shorts', 'Globe', 26.0, 1),
 (4626490, 'Cargo Shorts', 'Billabong', 54.0, 1),
 (4626490, 'Sweater', 'Dickies', 56.0, 1)
]

# Order Details must contain the tuples with matching order numbers, merge them into a single tuple, and store all the tuples in a list
order_details = []

for o in orders:
    for d in details:
        if o[0] == d[0]:
            # skip the first element as it is the order number which is already insterted by `o`
            order_details.append(o + d[1:])

display(order_details)

[(9423517, '2022-02-04', 9001, 'Jeans', 'Rip Curl', 87.0, 1),
 (9423517, '2022-02-04', 9001, 'Jacket', 'The North Face', 112.0, 1),
 (4626232, '2022-02-04', 9003, 'Socks', 'Vans', 15.0, 1),
 (4626232, '2022-02-04', 9003, 'Jeans', 'Quiksilver', 82.0, 1),
 (9423534, '2022-02-04', 9001, 'Socks', 'DC', 10.0, 2),
 (9423534, '2022-02-04', 9001, 'Socks', 'Quiksilver', 12.0, 2),
 (9423679, '2022-02-05', 9002, 'T-shirt', 'Patagonia', 35.0, 1),
 (4626377, '2022-02-05', 9003, 'Hoody', 'Animal', 44.0, 1),
 (4626377, '2022-02-05', 9003, 'Cargo Shorts', 'Animal', 38.0, 1),
 (4626412, '2022-02-05', 9004, 'Shirt', 'Volcom', 78.0, 1),
 (9423783, '2022-02-06', 9002, 'Boxer Shorts', 'Superdry', 30.0, 2),
 (9423783, '2022-02-06', 9002, 'Shorts', 'Globe', 26.0, 1),
 (4626490, '2022-02-06', 9004, 'Cargo Shorts', 'Billabong', 54.0, 1),
 (4626490, '2022-02-06', 9004, 'Sweater', 'Dickies', 56.0, 1)]

In case we have missing "columns" (fields) in part of our data we might want to still join both sets but a default value must be specified at the moment of performing this join.

In [7]:
# Append an item that doesnt matches order data
details.append((4626592, 'Shorts', 'Protest', 48.0, 1))

In [8]:
# Compact version of aboves loop
orders_details = [[o for o in orders if d[0] == o][0] + d[1:] for d in details]

IndexError: list index out of range

When theres a unmatched order number the slice/range operation fails

In [9]:
# Fixes the index out of bounds issue but stills doesnt includes the new 
orders_details = [[o for o in orders if d[0] in o][0] + d[1:] for d in details if d[0] in [o[0] for o in orders]]
display(orders_details)

[(9423517, '2022-02-04', 9001, 'Jeans', 'Rip Curl', 87.0, 1),
 (9423517, '2022-02-04', 9001, 'Jacket', 'The North Face', 112.0, 1),
 (4626232, '2022-02-04', 9003, 'Socks', 'Vans', 15.0, 1),
 (4626232, '2022-02-04', 9003, 'Jeans', 'Quiksilver', 82.0, 1),
 (9423534, '2022-02-04', 9001, 'Socks', 'DC', 10.0, 2),
 (9423534, '2022-02-04', 9001, 'Socks', 'Quiksilver', 12.0, 2),
 (9423679, '2022-02-05', 9002, 'T-shirt', 'Patagonia', 35.0, 1),
 (4626377, '2022-02-05', 9003, 'Hoody', 'Animal', 44.0, 1),
 (4626377, '2022-02-05', 9003, 'Cargo Shorts', 'Animal', 38.0, 1),
 (4626412, '2022-02-05', 9004, 'Shirt', 'Volcom', 78.0, 1),
 (9423783, '2022-02-06', 9002, 'Boxer Shorts', 'Superdry', 30.0, 2),
 (9423783, '2022-02-06', 9002, 'Shorts', 'Globe', 26.0, 1),
 (4626490, '2022-02-06', 9004, 'Cargo Shorts', 'Billabong', 54.0, 1),
 (4626490, '2022-02-06', 9004, 'Sweater', 'Dickies', 56.0, 1)]

In [10]:
# In order to include the missing order we must set a de-fault value when the orders list is missing an entry for a provided order detail
order_details_right = [[o for o in orders if d[0] in o][0] + d[1:] if d[0] in [o[0] for o in orders] else (d[0], None, None) + d[1:] for d in details]
display(order_details_right)

[(9423517, '2022-02-04', 9001, 'Jeans', 'Rip Curl', 87.0, 1),
 (9423517, '2022-02-04', 9001, 'Jacket', 'The North Face', 112.0, 1),
 (4626232, '2022-02-04', 9003, 'Socks', 'Vans', 15.0, 1),
 (4626232, '2022-02-04', 9003, 'Jeans', 'Quiksilver', 82.0, 1),
 (9423534, '2022-02-04', 9001, 'Socks', 'DC', 10.0, 2),
 (9423534, '2022-02-04', 9001, 'Socks', 'Quiksilver', 12.0, 2),
 (9423679, '2022-02-05', 9002, 'T-shirt', 'Patagonia', 35.0, 1),
 (4626377, '2022-02-05', 9003, 'Hoody', 'Animal', 44.0, 1),
 (4626377, '2022-02-05', 9003, 'Cargo Shorts', 'Animal', 38.0, 1),
 (4626412, '2022-02-05', 9004, 'Shirt', 'Volcom', 78.0, 1),
 (9423783, '2022-02-06', 9002, 'Boxer Shorts', 'Superdry', 30.0, 2),
 (9423783, '2022-02-06', 9002, 'Shorts', 'Globe', 26.0, 1),
 (4626490, '2022-02-06', 9004, 'Cargo Shorts', 'Billabong', 54.0, 1),
 (4626490, '2022-02-06', 9004, 'Sweater', 'Dickies', 56.0, 1),
 (4626592, None, None, 'Shorts', 'Protest', 48.0, 1)]

Get a sum over all items

In [11]:
sum(price * quantity for _, _, _, _, _, price, quantity in order_details_right)

779.0

In [12]:
# Filter out orders not present in `orders` list by checkitn
sum(price * quantity for _, date, _, _, _, price, quantity in order_details_right if date != None)

731.0

## Concatenating NumPy Arrays

In [13]:
import numpy as np

jeff_salary = [2700, 3000, 3000]
nick_salary = [2600, 2800, 2800]
tom_salary = [2300, 2500, 2500]

base_salary1 = np.array([jeff_salary, nick_salary, tom_salary])
display(base_salary1)

array([[2700, 3000, 3000],
       [2600, 2800, 2800],
       [2300, 2500, 2500]])

In [14]:
maya_salary = [2200, 2400, 3000]
john_salary = [2500, 2700, 2700]

base_salary2 = np.array([maya_salary, john_salary])
display(base_salary2)

array([[2200, 2400, 3000],
       [2500, 2700, 2700]])

In [15]:
base_salary = np.concatenate((base_salary1, base_salary2), axis=0) # axis=0 concatenates vertically
display(base_salary)

array([[2700, 3000, 3000],
       [2600, 2800, 2800],
       [2300, 2500, 2500],
       [2200, 2400, 3000],
       [2500, 2700, 2700]])

With both salary arrays merged into one, we can now open scenario for a case where a single month salary per employee is also introduced.
In this case we want to concatenate an array of salaries that holds the same order as of employees.

In [16]:
# Introduces a new month salary for each employee where each item (array in the 2nd dimension) belongs to an employee
new_month_salary = [
    [3000],
    [2900],
    [2500],
    [2500],
    [2700]]

In [17]:
# Concatenates the `new_month_salary` to the `base_salary` array. Uses axis=1 to concatenate horizontally
base_salary = np.concatenate((base_salary, new_month_salary), axis=1)
display(base_salary)

array([[2700, 3000, 3000, 3000],
       [2600, 2800, 2800, 2900],
       [2300, 2500, 2500, 2500],
       [2200, 2400, 3000, 2500],
       [2500, 2700, 2700, 2700]])

In [18]:
upcoming_months = [
    [3000, 3200],
    [2900, 2900],
    [2500, 2900],
    [3000, 3000],
    [2900, 2900],
]

base_salary = np.concatenate((base_salary, upcoming_months), axis=1)
display(base_salary)

array([[2700, 3000, 3000, 3000, 3000, 3200],
       [2600, 2800, 2800, 2900, 2900, 2900],
       [2300, 2500, 2500, 2500, 2500, 2900],
       [2200, 2400, 3000, 2500, 3000, 3000],
       [2500, 2700, 2700, 2700, 2900, 2900]])

In [19]:
new_employee_salary = [
    [0, 0, 0, 0, 0, 2900]
]

base_salary = np.concatenate((base_salary, new_employee_salary), axis=0)
display(base_salary)

array([[2700, 3000, 3000, 3000, 3000, 3200],
       [2600, 2800, 2800, 2900, 2900, 2900],
       [2300, 2500, 2500, 2500, 2500, 2900],
       [2200, 2400, 3000, 2500, 3000, 3000],
       [2500, 2700, 2700, 2700, 2900, 2900],
       [   0,    0,    0,    0,    0, 2900]])

## Concatenating Pandas DataFrames

In [20]:
import pandas as pd

salary_df1 = pd.DataFrame({
    'jeff': jeff_salary,
    'nick': nick_salary,
    'tom': tom_salary,
})

In [21]:
# Updates indices to be months

salary_df1.index = ['June', 'July', 'August']

In [22]:
display(salary_df1)

Unnamed: 0,jeff,nick,tom
June,2700,2600,2300
July,3000,2800,2500
August,3000,2800,2500


In [23]:
# Transpose dataframe to use rows as columns
salary_df1 = salary_df1.T

In [27]:
salary_df2 = pd.DataFrame({
    'maya': maya_salary,                 # Creates columns for Maya and John
    'john': john_salary
}, index = ['June', 'July', 'August']    # Sets indexes
).T                                      # Transposes rows/columns

In [28]:
display(salary_df2)

Unnamed: 0,June,July,August
maya,2200,2400,3000
john,2500,2700,2700


## Concatenate Rows (Axis 0) (Default)

In [31]:
# Appends rows in the second df at the end of the first df

salary_df = pd.concat([salary_df1, salary_df2])
display(salary_df)

Unnamed: 0,June,July,August
jeff,2700,3000,3000
nick,2600,2800,2800
tom,2300,2500,2500
maya,2200,2400,3000
john,2500,2700,2700


## Concatenate Columns (Axis 1)

In [32]:
# Appends columns from the second df to the first df

salary_df3 = pd.DataFrame({
    'September': [3000, 2800, 2500, 2400, 2700],
    'October': [3200, 3000, 2700, 2500, 2900]
}, index = ['jeff', 'nick', 'tom', 'maya', 'john'])

In [33]:
salary_df = pd.concat([salary_df, salary_df3], axis=1)
display(salary_df)

Unnamed: 0,June,July,August,September,October
jeff,2700,3000,3000,3000,3200
nick,2600,2800,2800,2800,3000
tom,2300,2500,2500,2500,2700
maya,2200,2400,3000,2400,2500
john,2500,2700,2700,2700,2900


## Removing Columns/Rows

In [34]:
# Columns

salary_df = salary_df.drop(['September', 'October'], axis=1)
display(salary_df)

Unnamed: 0,June,July,August
jeff,2700,3000,3000
nick,2600,2800,2800
tom,2300,2500,2500
maya,2200,2400,3000
john,2500,2700,2700


In [35]:
# Rows

salary_df = salary_df.drop(['maya', 'john'], axis=0)
display(salary_df)

Unnamed: 0,June,July,August
jeff,2700,3000,3000
nick,2600,2800,2800
tom,2300,2500,2500


## Hirarchical Index

In [38]:
df_date_region1 = pd.DataFrame(
 [
  ('2022-02-04', 'East', 97.0),
  ('2022-02-04', 'West', 243.0),
  ('2022-02-05', 'East', 160.0),
  ('2022-02-05', 'West', 35.0),
  ('2022-02-06', 'East', 110.0),
  ('2022-02-06', 'West', 86.0)
 ],
 columns =['Date', 'Region', 'Total']).set_index(['Date','Region'])


df_date_region2 = pd.DataFrame(
 [
  ('2022-02-04', 'South', 114.0),
  ('2022-02-05', 'South', 325.0),
  ('2022-02-06', 'South', 212.0)
 ],
 columns =['Date', 'Region', 'Total']).set_index(['Date','Region'])

In [39]:
df_date_region = pd.concat([df_date_region1, df_date_region2])
display(df_date_region)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2022-02-04,East,97.0
2022-02-04,West,243.0
2022-02-05,East,160.0
2022-02-05,West,35.0
2022-02-06,East,110.0
2022-02-06,West,86.0
2022-02-04,South,114.0
2022-02-05,South,325.0
2022-02-06,South,212.0


In [40]:
# Sort by date then region

df_date_region = df_date_region.sort_index(level=['Date', 'Region'])
display(df_date_region)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2022-02-04,East,97.0
2022-02-04,South,114.0
2022-02-04,West,243.0
2022-02-05,East,160.0
2022-02-05,South,325.0
2022-02-05,West,35.0
2022-02-06,East,110.0
2022-02-06,South,212.0
2022-02-06,West,86.0


## Join Two DataFrames

In [53]:
df_orders = pd.DataFrame(orders, columns =['OrderNo', 'Date', 'Empno'])
df_details = pd.DataFrame(details, columns =['OrderNo', 'Item', 'Brand',
                                             'Price', 'Quantity'])
df_details_2 = pd.DataFrame({
   'OrderNo': [4525481],
   'Item': ['Jeans'],
   'Brand': ['Levis'],
   'Price': [71.0],
   'Quantity': [2]
})
df_details = pd.concat([df_details, df_details_2], ignore_index = True)
display(df_details)

Unnamed: 0,OrderNo,Item,Brand,Price,Quantity
0,9423517,Jeans,Rip Curl,87.0,1
1,9423517,Jacket,The North Face,112.0,1
2,4626232,Socks,Vans,15.0,1
3,4626232,Jeans,Quiksilver,82.0,1
4,9423534,Socks,DC,10.0,2
5,9423534,Socks,Quiksilver,12.0,2
6,9423679,T-shirt,Patagonia,35.0,1
7,4626377,Hoody,Animal,44.0,1
8,4626377,Cargo Shorts,Animal,38.0,1
9,4626412,Shirt,Volcom,78.0,1


In [55]:
# Join orders and details

df_orders_details_right = df_orders.merge(df_details, how='right', left_on='OrderNo', right_on='OrderNo')
display(df_orders_details_right)

Unnamed: 0,OrderNo,Date,Empno,Item,Brand,Price,Quantity
0,9423517,2022-02-04,9001.0,Jeans,Rip Curl,87.0,1
1,9423517,2022-02-04,9001.0,Jacket,The North Face,112.0,1
2,4626232,2022-02-04,9003.0,Socks,Vans,15.0,1
3,4626232,2022-02-04,9003.0,Jeans,Quiksilver,82.0,1
4,9423534,2022-02-04,9001.0,Socks,DC,10.0,2
5,9423534,2022-02-04,9001.0,Socks,Quiksilver,12.0,2
6,9423679,2022-02-05,9002.0,T-shirt,Patagonia,35.0,1
7,4626377,2022-02-05,9003.0,Hoody,Animal,44.0,1
8,4626377,2022-02-05,9003.0,Cargo Shorts,Animal,38.0,1
9,4626412,2022-02-05,9004.0,Shirt,Volcom,78.0,1


In [59]:
# Fix float64 caused by the NaN insertion due to missing values on join op

df_orders_details_right = df_orders_details_right.fillna({ 'Empno': 0 }).astype({ 'Empno': 'int64' })
display(df_orders_details_right)

Unnamed: 0,OrderNo,Date,Empno,Item,Brand,Price,Quantity
0,9423517,2022-02-04,9001,Jeans,Rip Curl,87.0,1
1,9423517,2022-02-04,9001,Jacket,The North Face,112.0,1
2,4626232,2022-02-04,9003,Socks,Vans,15.0,1
3,4626232,2022-02-04,9003,Jeans,Quiksilver,82.0,1
4,9423534,2022-02-04,9001,Socks,DC,10.0,2
5,9423534,2022-02-04,9001,Socks,Quiksilver,12.0,2
6,9423679,2022-02-05,9002,T-shirt,Patagonia,35.0,1
7,4626377,2022-02-05,9003,Hoody,Animal,44.0,1
8,4626377,2022-02-05,9003,Cargo Shorts,Animal,38.0,1
9,4626412,2022-02-05,9004,Shirt,Volcom,78.0,1


## Many-To-Many

In [60]:
books = pd.DataFrame({
    'book_id': ['b1', 'b2', 'b3'],
    'title': ['Beautiful Coding', 'Python for Web Development', 'Pythonic Thinking'],
    'topic': ['programming', 'Python, Web', 'Python']
})

authors = pd.DataFrame({
    'author_id': ['jsn', 'tri', 'wsn'],
    'author': ['Johnson', 'Treloni', 'Willson']
})

In [61]:
display(books)
display(authors)

Unnamed: 0,book_id,title,topic
0,b1,Beautiful Coding,programming
1,b2,Python for Web Development,"Python, Web"
2,b3,Pythonic Thinking,Python


Unnamed: 0,author_id,author
0,jsn,Johnson
1,tri,Treloni
2,wsn,Willson


In [63]:
matching = pd.DataFrame({
    'author_id': ['jsn', 'jsn','tri', 'wsn'],
    'book_id': ['b1', 'b2', 'b2', 'b3']
})

display(matching)

Unnamed: 0,author_id,book_id
0,jsn,b1
1,jsn,b2
2,tri,b2
3,wsn,b3


In [68]:
books_matching = books.merge(matching)
authorship = books_matching.merge(authors)
books_authors = authorship[['author', 'title', 'topic']]
# .merge(authors)[['title','topic','author']]
# display(authorship)

display(books_matching)
display(authorship)
display(books_authors)

Unnamed: 0,book_id,title,topic,author_id
0,b1,Beautiful Coding,programming,jsn
1,b2,Python for Web Development,"Python, Web",jsn
2,b2,Python for Web Development,"Python, Web",tri
3,b3,Pythonic Thinking,Python,wsn


Unnamed: 0,book_id,title,topic,author_id,author
0,b1,Beautiful Coding,programming,jsn,Johnson
1,b2,Python for Web Development,"Python, Web",jsn,Johnson
2,b2,Python for Web Development,"Python, Web",tri,Treloni
3,b3,Pythonic Thinking,Python,wsn,Willson


Unnamed: 0,author,title,topic
0,Johnson,Beautiful Coding,programming
1,Johnson,Python for Web Development,"Python, Web"
2,Treloni,Python for Web Development,"Python, Web"
3,Willson,Pythonic Thinking,Python
