### Prepping Data Challenge:  Picture Perfect (week 35)


### Requirements
- Input the data
- Split up the sizes of the pictures and the frames into lengths and widths
  - Remember an inch is 2.54cm
- Frames can always be rotated, so make sure you know which is the min/max side
- See which pictures fit into which frames
- Work out the area of the frame vs the area of the picture and choose the frame with the smallest excess
- Output the data

In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
#Input the data
with pd.ExcelFile('Wk35-Input.xlsx') as xl:
    picture = pd.read_excel(xl,'Pictures')
    frame = pd.read_excel(xl, 'Frames')

In [3]:
picture.head()

Unnamed: 0,Picture,Size
0,A,26cm x 23cm
1,B,30cm x 26cm
2,C,24cm2
3,D,25cm x 23cm
4,E,22cm x 19cm


In [4]:
frame.head()

Unnamed: 0,Size
0,"8"" x 10"""
1,"6"" x 4"""
2,"8"" x 6"""
3,30cm x 21cm
4,31cm x 25cm


In [5]:
picture['Size'].unique()

array(['26cm x 23cm', '30cm x 26cm', '24cm2', '25cm x 23cm',
       '22cm x 19cm', '28cm x 20cm', '33cm x 23cm', '23cm x 21cm',
       '36cm x 25cm', '26cm x 20cm', '26cm x 28cm ', '23cm x 32cm',
       '33cm x 24cm'], dtype=object)

In [6]:
def get_size(x):
    side_1 = float(re.search('^(\d+)', x).group(1))
    side_2 = float(re.search('\D(\d+)\D', x).group(1) if re.search('\D(\d+)\D', x) else side_1)
    all_sides = [side_1, side_2] if re.search('(cm)', x) else [side_1*2.54, side_2*2.54]
    all_sides.sort()
    
    return all_sides

In [7]:
#Split up the sizes of the pictures and the frames into lengths and widths
picture['Side_Size'] = picture['Size'].apply(lambda x: get_size(x))
picture['Min'] = picture['Side_Size'].apply(lambda x: x[0])
picture['Max'] = picture['Side_Size'].apply(lambda x: x[1])

In [8]:
#Frames can always be rotated, so make sure you know which is the min/max side
frame['Side_Size2'] = frame['Size'].apply(lambda x: get_size(x))

In [9]:
df = pd.merge(picture, frame, how='cross')

In [10]:
df.head()

Unnamed: 0,Picture,Size_x,Side_Size,Min,Max,Size_y,Side_Size2
0,A,26cm x 23cm,"[23.0, 26.0]",23.0,26.0,"8"" x 10""","[20.32, 25.4]"
1,A,26cm x 23cm,"[23.0, 26.0]",23.0,26.0,"6"" x 4""","[10.16, 15.24]"
2,A,26cm x 23cm,"[23.0, 26.0]",23.0,26.0,"8"" x 6""","[15.24, 20.32]"
3,A,26cm x 23cm,"[23.0, 26.0]",23.0,26.0,30cm x 21cm,"[21.0, 30.0]"
4,A,26cm x 23cm,"[23.0, 26.0]",23.0,26.0,31cm x 25cm,"[25.0, 31.0]"


In [11]:
#See which pictures fit into which frames
#Work out the area of the frame vs the area of the picture and choose the frame with the smallest excess
df['Fit'] = df.apply(lambda x: x['Side_Size2'][0]>=x['Side_Size'][0] and x['Side_Size2'][1]>=x['Side_Size'][1], axis=1)
df = df[df['Fit']].copy()
df['Excess Area'] = df.apply(lambda x: np.prod(x['Side_Size2'])-np.prod(x['Side_Size']), axis=1)
df['Min Excess Area'] = df['Excess Area'].groupby(df['Picture']).transform('min')

In [12]:
df = df[df['Excess Area']==df['Min Excess Area']][['Picture', 'Size_y', 'Max', 'Min']]

In [13]:
df.head()

Unnamed: 0,Picture,Size_y,Max,Min
4,A,31cm x 25cm,26.0,23.0
14,B,30cm2,30.0,26.0
24,C,25cm2,24.0,24.0
33,D,25cm2,25.0,23.0
43,E,20cm x 25cm,22.0,19.0


In [14]:
#output the data
df.to_csv('wk35-output.csv', index=False)