# Examples illustrating data transformations
## Copyright (C) Princeton Consultants, 2017
### First import the pandas library

In [1]:
import pandas as pd

### We will first read in a tidy representation

In [2]:
tidy = pd.read_csv('01.tidysteel.csv',index_col=[0,1])
tidy

Unnamed: 0_level_0,Unnamed: 1_level_0,market,revenue
PROD,T,Unnamed: 2_level_1,Unnamed: 3_level_1
bands,1,6000,25
coils,1,4000,30
bands,2,6000,26
coils,2,2500,35
bands,3,4000,27
coils,3,3500,37
bands,4,6500,27
coils,4,4200,39


### Create 2 dataframes that are untidy representations.  These could have been the original source data

In [3]:
market = tidy.reset_index().pivot(index='PROD', columns='T', values='market')
market

T,1,2,3,4
PROD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bands,6000,6000,4000,6500
coils,4000,2500,3500,4200


In [4]:
revenue = tidy.reset_index().pivot(index='PROD', columns='T', values='revenue')
revenue

T,1,2,3,4
PROD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bands,25,26,27,27
coils,30,35,37,39


### Create a tidy representation of market and of revenue, using `pd.melt()`

In [5]:
tidymarket = pd.melt(market.reset_index(), id_vars=['PROD'], value_name='market').set_index(['PROD','T'])
tidymarket

Unnamed: 0_level_0,Unnamed: 1_level_0,market
PROD,T,Unnamed: 2_level_1
bands,1,6000
coils,1,4000
bands,2,6000
coils,2,2500
bands,3,4000
coils,3,3500
bands,4,6500
coils,4,4200


In [6]:
tidyrevenue = pd.melt(revenue.reset_index(), id_vars=['PROD'], value_name='revenue').set_index(['PROD','T'])
tidyrevenue

Unnamed: 0_level_0,Unnamed: 1_level_0,revenue
PROD,T,Unnamed: 2_level_1
bands,1,25
coils,1,30
bands,2,26
coils,2,35
bands,3,27
coils,3,37
bands,4,27
coils,4,39


### Create the tidy representation as one table, using `pd.concat()`

In [7]:
newtidy = pd.concat([tidymarket, tidyrevenue], axis=1)
newtidy

Unnamed: 0_level_0,Unnamed: 1_level_0,market,revenue
PROD,T,Unnamed: 2_level_1,Unnamed: 3_level_1
bands,1,6000,25
coils,1,4000,30
bands,2,6000,26
coils,2,2500,35
bands,3,4000,27
coils,3,3500,37
bands,4,6500,27
coils,4,4200,39


### See if the original tidy representation and the new one are the same

In [8]:
all(tidy == newtidy)

True