In [1]:
import pandas as pd
import re

In [2]:
df = pd.read_excel("E-741 Pivot.xlsx", usecols = 'A', skiprows = 1)
df.head()

Unnamed: 0,Data
0,"Item1, 44, Group A"
1,"59, 12, Group A, Item2"
2,"Item1, Group A, 89"
3,"35, Item2, Group B, 18, 81"
4,"Group B, 90, Item2, 54"


In [3]:
def split_transform(data: str):

    split_str = data.split(', ')
    sorted_list = sorted(split_str)
    
    nums = [sum(map(float, sorted_list[:-2]))]
    words = sorted_list[-2:]
    
    return nums + words
        

In [4]:
df['Split'] = df['Data'].apply(split_transform)
df.head()

Unnamed: 0,Data,Split
0,"Item1, 44, Group A","[44.0, Group A, Item1]"
1,"59, 12, Group A, Item2","[71.0, Group A, Item2]"
2,"Item1, Group A, 89","[89.0, Group A, Item1]"
3,"35, Item2, Group B, 18, 81","[134.0, Group B, Item2]"
4,"Group B, 90, Item2, 54","[144.0, Group B, Item2]"


In [5]:
split = df['Split'].apply(pd.Series)
split.columns = ['Value', 'Group', 'Item']

split.head()

Unnamed: 0,Value,Group,Item
0,44.0,Group A,Item1
1,71.0,Group A,Item2
2,89.0,Group A,Item1
3,134.0,Group B,Item2
4,144.0,Group B,Item2


In [6]:
pivot = split.pivot_table(index = 'Group', columns = 'Item', values = 'Value', aggfunc = 'sum').reset_index()
pivot.columns.name = None
pivot.fillna(0)

Unnamed: 0,Group,Item1,Item2,Item3
0,Group A,133.0,71.0,0.0
1,Group B,0.0,278.0,0.0
2,Group C,46.0,0.0,120.0


##### v2

In [7]:
df = pd.read_excel("E-741 Pivot.xlsx", usecols = 'A', skiprows = 1)
df

Unnamed: 0,Data
0,"Item1, 44, Group A"
1,"59, 12, Group A, Item2"
2,"Item1, Group A, 89"
3,"35, Item2, Group B, 18, 81"
4,"Group B, 90, Item2, 54"
5,"39, Group C, 18, Item3, 12, 14"
6,"37, Item3, Group C"
7,"Group C, Item1, 46"


In [8]:
def split_transform_regex(data: list):

    nums = []
    words = []
    
    for val in data:
        if re.fullmatch(r'\d+', val):
            nums.append(val)
        else:
            words.append(val)
            
    return [sum(map(float, nums))] + sorted(words)

In [9]:
df['Split'] = df['Data'].str.split(', ').apply(split_transform_regex)
df

Unnamed: 0,Data,Split
0,"Item1, 44, Group A","[44.0, Group A, Item1]"
1,"59, 12, Group A, Item2","[71.0, Group A, Item2]"
2,"Item1, Group A, 89","[89.0, Group A, Item1]"
3,"35, Item2, Group B, 18, 81","[134.0, Group B, Item2]"
4,"Group B, 90, Item2, 54","[144.0, Group B, Item2]"
5,"39, Group C, 18, Item3, 12, 14","[83.0, Group C, Item3]"
6,"37, Item3, Group C","[37.0, Group C, Item3]"
7,"Group C, Item1, 46","[46.0, Group C, Item1]"


In [10]:
split = df['Split'].apply(pd.Series)
split.columns = ['Value', 'Group', 'Item']
split.head()

Unnamed: 0,Value,Group,Item
0,44.0,Group A,Item1
1,71.0,Group A,Item2
2,89.0,Group A,Item1
3,134.0,Group B,Item2
4,144.0,Group B,Item2


In [11]:
pivot = split.pivot_table(index = 'Group', columns = 'Item', values = 'Value', aggfunc = 'sum').reset_index()
pivot.columns.name = None
pivot.fillna(0)

Unnamed: 0,Group,Item1,Item2,Item3
0,Group A,133.0,71.0,0.0
1,Group B,0.0,278.0,0.0
2,Group C,46.0,0.0,120.0
