### Prepping Data Challenge: Multi Sheets of Madness (Week 21)
There are 12 sheets from different shops reporting the Key Metrics that we are interested in. There are Additional Metrics in a table below that are not of interest to us for this challenge. 
 
### Requirements
- Connect to the data
- Bring together the Key Metrics tables from each Shop
- You'll notice that we have fields which report the quarter in addition to the monthly values. We only wish to keep the monthly values
- Reshape the data so that we have a Date field
- For Orders and Returns, we are only interested in reporting % values, whilst for Complaints we are only interested in the # Received
- We wish to update the Breakdown field to include the Department to make the Measure Name easier to interpret
- We wish to have a field for each of the measures rather than a row per measure
- We wish to have the targets for each measure as field that we can compare each measure to
- Output the data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = None
with pd.ExcelFile("WK21-Input.xlsx") as xl:
    for s in xl.sheet_names:
        df_new = pd.read_excel(xl, s, header=3)
        df_new['Shop'] = s
        df = pd.concat([df, df_new])

In [3]:
#Bring together the Key Metrics tables from each Shop
df = df[(df['Department'] != 'HR') & (df['Department'] != 'Additonal Metrics') & (df['Department'] !='Department')]

In [4]:
df['Department'] = df['Department'].ffill()
df['Target'] = df['Target'].ffill()

In [5]:
#We only wish to keep the monthly values
df.drop(columns=['FY22 Q1 ','FY22 Q2','FY22 Q3','FY22 Q4','Comments'], inplace=True, axis=1)

In [6]:
#For Orders and Returns, we are only interested in reporting % values, 
#whilst for Complaints we are only interested in the # Received 
df = df[(df['Breakdown'].isin(['% Shipped in 3 days','% Shipped in 5 days','% Processed in 3 days',
                               '% Processed in 5 days','# Received']))]

In [7]:
#Reshape the data so that we have a Date field
df_pivot = pd.melt(df, id_vars=['Department','Target','Breakdown','Shop'], var_name='Date')

In [8]:
df_pivot.head(10)

Unnamed: 0,Department,Target,Breakdown,Shop,Date,value
0,Orders,>95%,% Shipped in 3 days,Bath,2021-07-01,0.91
1,Orders,>99%,% Shipped in 5 days,Bath,2021-07-01,0.99
2,Returns,>80%,% Processed in 3 days,Bath,2021-07-01,0.88
3,Returns,>95%,% Processed in 5 days,Bath,2021-07-01,0.91
4,Complaints,0,# Received,Bath,2021-07-01,25.0
5,Orders,>95%,% Shipped in 3 days,Torquay,2021-07-01,0.84
6,Orders,>99%,% Shipped in 5 days,Torquay,2021-07-01,0.92
7,Returns,>80%,% Processed in 3 days,Torquay,2021-07-01,0.86
8,Returns,>95%,% Processed in 5 days,Torquay,2021-07-01,0.91
9,Complaints,0,# Received,Torquay,2021-07-01,15.0


In [9]:
#We wish to update the Breakdown field to include the Department to make the Measure Name easier to interpret
#We wish to have a field for each of the measures rather than a row per measure
df_pivot2 = df_pivot.copy()
df_pivot2['% Orders Shipped in 3 days'] = df_pivot.apply(lambda x : x['value'] if (x['Department'] == 'Orders' and 
                                                                                  x['Breakdown'] == '% Shipped in 3 days')
                                                                                  else None, axis=1)
df_pivot2.dropna(subset = ['% Orders Shipped in 3 days'], inplace=True)

In [10]:
df_pivot3 = df_pivot.copy()
df_pivot3['% Orders Shipped in 5 days'] = df_pivot.apply(lambda x : x['value'] if (x['Department'] == 'Orders' and 
                                                                                  x['Breakdown'] == '% Shipped in 5 days')
                                                                                  else None, axis=1)
df_pivot3.dropna(subset = ['% Orders Shipped in 5 days'], inplace=True)

In [11]:
df_pivot4 = df_pivot.copy()
df_pivot4['% Returns Processed in 3 days'] = df_pivot.apply(lambda x : x['value'] if (x['Department'] == 'Returns' and 
                                                                                  x['Breakdown'] == '% Processed in 3 days')
                                                                                  else None, axis=1)
df_pivot4.dropna(subset = ['% Returns Processed in 3 days'], inplace=True)

In [12]:
df_pivot5 = df_pivot.copy()
df_pivot5['% Returns Processed in 5 days'] = df_pivot.apply(lambda x : x['value'] if (x['Department'] == 'Returns' and 
                                                                                  x['Breakdown'] == '% Processed in 5 days')
                                                                                  else None, axis=1)
df_pivot5.dropna(subset = ['% Returns Processed in 5 days'], inplace=True)

In [13]:
df_pivot6 = df_pivot.copy()
df_pivot6['# Complaints Received'] = df_pivot.apply(lambda x : x['value'] if (x['Department'] == 'Complaints' and 
                                                                                  x['Breakdown'] == '# Received')
                                                                                  else None, axis=1)
df_pivot6.dropna(subset = ['# Complaints Received'], inplace=True)

In [14]:
df2 = df_pivot2[['Department','Shop','Date','% Orders Shipped in 3 days']].merge(df_pivot3[['Department','Shop','Date','% Orders Shipped in 5 days']],
       on=['Department','Shop','Date'], how='left')
df3 = df_pivot4[['Department','Shop','Date','% Returns Processed in 3 days']].merge(df_pivot5[['Department','Shop','Date','% Returns Processed in 5 days']],
       on=['Department','Shop','Date'], how='left')
df4 = df2.merge(df3, on=['Shop','Date'], how='left')
df5 = df4.merge(df_pivot6, on=['Shop','Date'], how='left')

In [15]:
df5.head()

Unnamed: 0,Department_x,Shop,Date,% Orders Shipped in 3 days,% Orders Shipped in 5 days,Department_y,% Returns Processed in 3 days,% Returns Processed in 5 days,Department,Target,Breakdown,value,# Complaints Received
0,Orders,Bath,2021-07-01,0.91,0.99,Returns,0.88,0.91,Complaints,0,# Received,25,25
1,Orders,Torquay,2021-07-01,0.84,0.92,Returns,0.86,0.91,Complaints,0,# Received,15,15
2,Orders,Exmouth,2021-07-01,0.85,0.89,Returns,0.76,0.78,Complaints,0,# Received,63,63
3,Orders,Plymouth,2021-07-01,0.81,0.88,Returns,0.68,0.73,Complaints,0,# Received,22,22
4,Orders,Portsmouth,2021-07-01,0.92,1.0,Returns,0.9,0.98,Complaints,0,# Received,12,12


In [16]:
#We wish to have the targets for each measure as field that we can compare each measure to
df5['Target - % Orders Shipped in 3 days'] = 0.95
df5['Target - % Orders Shipped in 5 days'] = 0.99
df5['Target - % Returns Processed in 3 days'] = 0.80
df5['Target - % Returns Processed in 5 days'] = 0.95
df5['Target - # Complaints Received'] = 0

In [17]:
output = df5[['Shop','Date','% Orders Shipped in 3 days','Target - % Orders Shipped in 3 days',
                   '% Orders Shipped in 5 days','Target - % Orders Shipped in 5 days','% Returns Processed in 3 days',
                   'Target - % Returns Processed in 3 days','% Returns Processed in 5 days',
                   'Target - % Returns Processed in 5 days','# Complaints Received','Target - # Complaints Received']]

In [18]:
output.head(10)

Unnamed: 0,Shop,Date,% Orders Shipped in 3 days,Target - % Orders Shipped in 3 days,% Orders Shipped in 5 days,Target - % Orders Shipped in 5 days,% Returns Processed in 3 days,Target - % Returns Processed in 3 days,% Returns Processed in 5 days,Target - % Returns Processed in 5 days,# Complaints Received,Target - # Complaints Received
0,Bath,2021-07-01,0.91,0.95,0.99,0.99,0.88,0.8,0.91,0.95,25,0
1,Torquay,2021-07-01,0.84,0.95,0.92,0.99,0.86,0.8,0.91,0.95,15,0
2,Exmouth,2021-07-01,0.85,0.95,0.89,0.99,0.76,0.8,0.78,0.95,63,0
3,Plymouth,2021-07-01,0.81,0.95,0.88,0.99,0.68,0.8,0.73,0.95,22,0
4,Portsmouth,2021-07-01,0.92,0.95,1.0,0.99,0.9,0.8,0.98,0.95,12,0
5,Reading,2021-07-01,0.83,0.95,0.88,0.99,0.75,0.8,0.81,0.95,11,0
6,Southampton,2021-07-01,0.86,0.95,0.98,0.99,0.91,0.8,0.82,0.95,11,0
7,Hastings,2021-07-01,0.83,0.95,0.85,0.99,0.89,0.8,0.93,0.95,40,0
8,Leicester,2021-07-01,0.8,0.95,0.82,0.99,0.99,0.8,1.0,0.95,6,0
9,Nottingham,2021-07-01,0.9,0.95,0.98,0.99,0.87,0.8,0.93,0.95,15,0


In [19]:
#output the data 
output.to_excel('wk21-output.xlsx', index=False)