In [1]:
import pandas as pd
from pathlib import Path

### Read in files

In [2]:
# Import data
france_data_path = Path('../Resources/france_products.csv')
uk_data_path = Path('../Resources/uk_products.csv')
netherlands_data_path = Path('../Resources/netherlands_products.csv')
customer_data_path = Path('../Resources/customer_info.csv')
products_data_path = Path('../Resources/products.csv')

france_data = pd.read_csv(france_data_path)
uk_data = pd.read_csv(uk_data_path)
netherlands_data = pd.read_csv(netherlands_data_path)
customer_data = pd.read_csv(customer_data_path)
products_data = pd.read_csv(products_data_path)

### Output sample of data

In [3]:
# Show sample of France data
france_data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536370,10002,INFLATABLE POLITICAL GLOBE,48,12/1/2010 8:45,0.85,12583,France
1,536370,21791,VINTAGE HEADS AND TAILS CARD GAME,24,12/1/2010 8:45,1.25,12583,France
2,536370,21035,SET/2 RED RETROSPOT TEA TOWELS,18,12/1/2010 8:45,2.95,12583,France
3,536370,22326,ROUND SNACK BOXES SET OF4 WOODLAND,24,12/1/2010 8:45,2.95,12583,France
4,536370,22629,SPACEBOY LUNCH BOX,24,12/1/2010 8:45,1.95,12583,France


In [4]:
# Show sample of UK data
uk_data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,539490,22720,SET OF 3 CAKE TINS PANTRY DESIGN,6,12/20/2010 10:00,4.95,17069,United Kingdom
1,539490,22721,SET OF 3 CAKE TINS SKETCHBOOK,3,12/20/2010 10:00,4.95,17069,United Kingdom
2,539490,22722,SET OF 6 SPICE TINS PANTRY DESIGN,8,12/20/2010 10:00,3.95,17069,United Kingdom
3,539490,22723,SET OF 6 HERB TINS SKETCHBOOK,4,12/20/2010 10:00,3.95,17069,United Kingdom
4,539490,22961,JAM MAKING SET PRINTED,12,12/20/2010 10:00,1.45,17069,United Kingdom


In [5]:
# Show sample of Netherlands data
netherlands_data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,539491,21981,PACK OF 12 WOODLAND TISSUES,12,12/20/2010 10:09,0.29,14646,Netherlands
1,539491,21986,PACK OF 12 PINK POLKADOT TISSUES,12,12/20/2010 10:09,0.29,14646,Netherlands
2,539491,21123,SET/10 IVORY POLKADOT PARTY CANDLES,2,12/20/2010 10:09,1.25,14646,Netherlands
3,539491,47599A,PINK PARTY BAGS,2,12/20/2010 10:09,2.1,14646,Netherlands
4,539491,22331,WOODLAND PARTY BAG + STICKER SET,2,12/20/2010 10:09,1.65,14646,Netherlands


### Concatenate data by rows using `concat` function and `inner` join

In [6]:
# Join UK, France, and Netherlands full datasets by axis
joined_data_rows = pd.concat([france_data, uk_data, netherlands_data], axis="rows", join="inner")
joined_data_rows

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536370,10002,INFLATABLE POLITICAL GLOBE,48,12/1/2010 8:45,0.85,12583,France
1,536370,21791,VINTAGE HEADS AND TAILS CARD GAME,24,12/1/2010 8:45,1.25,12583,France
2,536370,21035,SET/2 RED RETROSPOT TEA TOWELS,18,12/1/2010 8:45,2.95,12583,France
3,536370,22326,ROUND SNACK BOXES SET OF4 WOODLAND,24,12/1/2010 8:45,2.95,12583,France
4,536370,22629,SPACEBOY LUNCH BOX,24,12/1/2010 8:45,1.95,12583,France
5,536370,22659,LUNCH BOX I LOVE LONDON,24,12/1/2010 8:45,1.95,12583,France
6,536370,22544,MINI JIGSAW SPACEBOY,24,12/1/2010 8:45,0.42,12583,France
0,539490,22720,SET OF 3 CAKE TINS PANTRY DESIGN,6,12/20/2010 10:00,4.95,17069,United Kingdom
1,539490,22721,SET OF 3 CAKE TINS SKETCHBOOK,3,12/20/2010 10:00,4.95,17069,United Kingdom
2,539490,22722,SET OF 6 SPICE TINS PANTRY DESIGN,8,12/20/2010 10:00,3.95,17069,United Kingdom


### Concatenate data by column using `concat` function and `inner` join

In [7]:
# Show sample of customer data
customer_data.head()

Unnamed: 0,CustomerID,FirstName,LastName,Address,City,Postal,Country,JoinDate
0,12583,Christine,Lagarde,26 rue des Nations Unies,SAINT-BENOÎT,97470,France,2012-09-12
1,17069,John,Grayken,73 Red Lane,Evershot,DT2 5JB,UK,2010-04-24
2,14646,Frits,Goldschmeding,Nassaupark 84,Bussum,1405 HP,Holland,2017-01-19


In [8]:
# Show sample of product data
products_data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,539491,21981,PACK OF 12 WOODLAND TISSUES,12,12/20/2010 10:09,0.29,14646,Netherlands
1,539490,22665,RECIPE BOX BLUE SKETCHBOOK DESIGN,12,12/20/2010 10:00,2.95,17069,United Kingdom
2,540455,22726,ALARM CLOCK BAKELIKE GREEN,16,1/7/2011 12:07,3.75,12583,France


In [9]:
# Join Customer and products by columns axis
joined_data_cols = pd.concat([customer_data, products_data], axis='columns', join='inner')
joined_data_cols.head()

Unnamed: 0,CustomerID,FirstName,LastName,Address,City,Postal,Country,JoinDate,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID.1,Country.1
0,12583,Christine,Lagarde,26 rue des Nations Unies,SAINT-BENOÎT,97470,France,2012-09-12,539491,21981,PACK OF 12 WOODLAND TISSUES,12,12/20/2010 10:09,0.29,14646,Netherlands
1,17069,John,Grayken,73 Red Lane,Evershot,DT2 5JB,UK,2010-04-24,539490,22665,RECIPE BOX BLUE SKETCHBOOK DESIGN,12,12/20/2010 10:00,2.95,17069,United Kingdom
2,14646,Frits,Goldschmeding,Nassaupark 84,Bussum,1405 HP,Holland,2017-01-19,540455,22726,ALARM CLOCK BAKELIKE GREEN,16,1/7/2011 12:07,3.75,12583,France
