In [1]:
from diskos.reader import build_reader
from diskos.spec import ParsedDiskosFile, BlockType
import diskos.spec as spec
from diskos.sender_specific.merge import AptAnalId

In [2]:
reader = build_reader(r'c:\data\diskos\6406_12-3_S__ROCK_AND_CORE__GCH_RAW_1.ASC', quiet_mode=True)
spec.SUPPRESS_WARNINGS = True  # some rows have missing cells - we know
parsed_file =reader.parse_file()
df = parsed_file.get_identity_block(BlockType.Analysis).as_dataframe

In [3]:
df

Unnamed: 0,FractionID,AnalID,AnalType,ADescription,ALaboratory,ADate,AInstrument,AComments
0,133092ARE,133092AREA,RE,A,APT,26.05.2014,Rock-Eval 6,
1,133093ARE,133093AREA,RE,A,APT,26.05.2014,Rock-Eval 6,
2,133094BRE,133094BREA,RE,A,APT,26.05.2014,Rock-Eval 6,
3,133095ARE,133095AREA,RE,A,APT,26.05.2014,Rock-Eval 6,
4,133096ARE,133096AREA,RE,A,APT,26.05.2014,Rock-Eval 6,
...,...,...,...,...,...,...,...,...
276,133113WVR,133113WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,"As above, but terrestrial OM more common."
277,133118WVR,133118WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,As above.
278,133122WVR,133122WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,Increased quantity of terrestrial organic matt...
279,133126WVR,133126WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,Vitrinite fairly common.


In [4]:
df['AnalCode'] = df['AnalID'].apply(lambda id: AptAnalId(id).anal_code)
unique_anal_codes = df['AnalCode'].unique()

In [5]:
cols_to_expand = ['ALaboratory', 'ADate', 'AInstrument', 'AComments']

In [6]:
for code in unique_anal_codes:
    for col in cols_to_expand:
        df[f"{col}_{code}"] = df.apply(lambda row: row[col] if row['AnalCode'] == code else '', axis=1)

In [7]:
df

Unnamed: 0,FractionID,AnalID,AnalType,ADescription,ALaboratory,ADate,AInstrument,AComments,AnalCode,ALaboratory_RE,...,AInstrument_GD,AComments_GD,ALaboratory_VK,ADate_VK,AInstrument_VK,AComments_VK,ALaboratory_VR,ADate_VR,AInstrument_VR,AComments_VR
0,133092ARE,133092AREA,RE,A,APT,26.05.2014,Rock-Eval 6,,RE,APT,...,,,,,,,,,,
1,133093ARE,133093AREA,RE,A,APT,26.05.2014,Rock-Eval 6,,RE,APT,...,,,,,,,,,,
2,133094BRE,133094BREA,RE,A,APT,26.05.2014,Rock-Eval 6,,RE,APT,...,,,,,,,,,,
3,133095ARE,133095AREA,RE,A,APT,26.05.2014,Rock-Eval 6,,RE,APT,...,,,,,,,,,,
4,133096ARE,133096AREA,RE,A,APT,26.05.2014,Rock-Eval 6,,RE,APT,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
276,133113WVR,133113WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,"As above, but terrestrial OM more common.",VR,,...,,,,,,,APT,14.10.2014,Zeiss Universal MPM03,"As above, but terrestrial OM more common."
277,133118WVR,133118WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,As above.,VR,,...,,,,,,,APT,14.10.2014,Zeiss Universal MPM03,As above.
278,133122WVR,133122WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,Increased quantity of terrestrial organic matt...,VR,,...,,,,,,,APT,14.10.2014,Zeiss Universal MPM03,Increased quantity of terrestrial organic matt...
279,133126WVR,133126WVRA,VR,A,APT,14.10.2014,Zeiss Universal MPM03,Vitrinite fairly common.,VR,,...,,,,,,,APT,14.10.2014,Zeiss Universal MPM03,Vitrinite fairly common.


In [8]:
analysis_codes = ['VR']
cols_to_merge = ['AnalID'] + [c for c in df.columns
                              if any([c.endswith(a) for a in analysis_codes])]
df[cols_to_merge]

Unnamed: 0,AnalID,ALaboratory_VR,ADate_VR,AInstrument_VR,AComments_VR
0,133092AREA,,,,
1,133093AREA,,,,
2,133094BREA,,,,
3,133095AREA,,,,
4,133096AREA,,,,
...,...,...,...,...,...
276,133113WVRA,APT,14.10.2014,Zeiss Universal MPM03,"As above, but terrestrial OM more common."
277,133118WVRA,APT,14.10.2014,Zeiss Universal MPM03,As above.
278,133122WVRA,APT,14.10.2014,Zeiss Universal MPM03,Increased quantity of terrestrial organic matt...
279,133126WVRA,APT,14.10.2014,Zeiss Universal MPM03,Vitrinite fairly common.


In [9]:
ANAL_ID_COL = 'AnalID'
import pandas as pd

def _merge_in_metadata(_df: pd.DataFrame):
    analysis_codes = _df[ANAL_ID_COL].apply(lambda id: AptAnalId(id).anal_code).unique()
    cols_to_merge = [ANAL_ID_COL] + [c for c in _df.columns
                                     if any([c.endswith(a) for a in analysis_codes])]
    relevant_metadata = _df[cols_to_merge]
    _df.merge(relevant_metadata, on=ANAL_ID_COL, how='left')
    return _df

In [10]:
re_df = parsed_file.blocks[6].as_dataframe
re_df_merged = _merge_in_metadata(re_df)

In [11]:
re_df_merged

Unnamed: 0,AnalID,S1,S2,S3,TMAX,TOC,PComments
0,133092AREA,0.05,0.32,0.31,374,1.47,TOC from Leco
1,133093AREA,0.04,0.52,0.48,384,1.33,TOC from Leco
2,133094BREA,0.08,0.62,1.07,392,1.30,TOC from Leco
3,133095AREA,0.04,0.56,0.36,431,1.48,TOC from Leco
4,133096AREA,0.05,0.56,0.68,433,1.62,TOC from Leco
...,...,...,...,...,...,...,...
56,10639WREH,6.97,76.66,1.15,430,12.28,TOC from Leco
57,10639WREI,5.75,74.81,0.98,429,12.28,TOC from Leco
58,10639WREJ,4.54,67.85,0.71,432,12.35,TOC from Leco
59,10639WREK,5.25,70.97,2.11,431,12.35,TOC from Leco


In [12]:
unique_anal_codes

array(['RE', 'BU', 'GW', 'GT', 'GS', 'GP', 'MS', 'MA', 'CSAT', 'CARO',
       'CWHO', 'GG', 'GC', 'GD', 'VK', 'VR'], dtype=object)