# Purpose: Convert SST-1 Sample Sheet Metadata to MediaWiki Table

## Imports and File Selection 

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import date

In [12]:
# Provide path to excel sheet
# Ex: r'/nsls2/users/bpatel/Sample_Bar_template_v2022_11_beta_2_BPSuggestion.xlsx'
excelSheetInputPath = Path(r'./Sample_Bar_template_v2022_11_beta_2_BPSuggestion.xlsx')

## Load and Process Data

In [13]:
def convertSampleSheetExcelMediaWiki(excelSheet: Path = None, rulesSheetName: str = 'SheetRulesAndMetaData', versionCell: str = 'B4', startRow_Params: int = 7 , endRow_Params: int = None, startColumn_Params: str = 'A', endColumn_Params:str = 'F') -> str:
    """Converts Sample Sheet Parameter Metadata into a MediaWiki-compatible format string.
    
    Parameters
    ----------
    excelSheet: Path
        Path (or string) to the excel sheet to be loaded.
    rulesSheetName: str
        Name of the excel sheet which should be parsed for metadata
    versionCell: str
        Location (e.g., 'B4') of the cell that contains the sheet version number
    startRow_Params: int
        Excel row number which contains the header for the metadata table (excel starts at row 1)
    endRow_Params: int
        Excel row number which contains the last row of metadata (leave as -1 if scanning to end of file)
    startColumn_Params: str
        First excel column (by letter) that contains the metadata table
    endColumn_Params:str
        Last excel column (by letter) that contains the metadata table

    Returns
    -------
    str
        A string containing the formatted table ready to copy-paste into MediaWiki
    """
    # Split versionCell in (row, column)
    versRow, versColumn = [int(''.join(filter(str.isdigit, versionCell))), ''.join(filter(str.isalpha, versionCell))]

    # Extract Version Code as a string
    versionStr = pd.read_excel(excelSheet, sheet_name=rulesSheetName, index_col=None, usecols = versColumn, nrows=0, header = versRow-1)
    versionStr = versionStr.columns.values[0]
    #print(versionStr)
    
    # Get Current Date
    date.today()
    
    # Add Wiki Page Header to Output string
    outStr = f"== SST-1 Sample Sheet Syntax Version: {versionStr} Last Updated: {date.today()} ==\n"
    

    # Extract Metadata Table

    # If endRow_Params is provided, limit the number of rows parsed
    if endRow_Params is None:
        numRows = None
    else:    
        numRows = endRow_Params - startRow_Params    

    # Convert column bounds to string
    colString = startColumn_Params + ":" + endColumn_Params

    excelMetadataIn = pd.read_excel(excelSheet, sheet_name=rulesSheetName, 
                                    header=startRow_Params-1, nrows = numRows, 
                                    usecols=colString)

    # Replace NaNs and 'nan's with blank
    excelMetadataIn = excelMetadataIn.replace('nan', '')
    excelMetadataIn = excelMetadataIn.fillna('')
    #display(excelMetadataIn)

    # Construct MediaWiki Table
    outStr += "\n" + r'{| class="wikitable sortable"' + "\n" + '|-\n'

    #Add header row elements
    outStr += '! '
    for colHeader in excelMetadataIn.columns:
        filteredColHeader = str(colHeader).replace('\r', ' ').replace('\n', ' ')
        outStr += f"{filteredColHeader} !! "

    #trim extra "!! "
    outStr = outStr[:-4]

    # Add Metadata Row Elements

    ## Loop through metadata rows
    for mdRow in excelMetadataIn.index-1:
        ###Loop through columns
        outStr += '\n|-\n| '
        for mdVal in excelMetadataIn.iloc[mdRow].to_list():
            filteredmdVal = str(mdVal).replace('\r', ' ').replace('\n', ' ')
            outStr += f"{filteredmdVal} || "
            pass
            #print(str(mdVal) + "\n")

    # Add MediaWiki Table End
    outStr += "\n|}"
    #print(outStr)

    return outStr

## Print MediaWiki Text

In [11]:
# Example Output see: https://wiki-nsls2.bnl.gov/beamline7ID1/index.php?title=User:Bijal
print(convertSampleSheetExcelMediaWiki(excelSheetInputPath))

== SST-1 Sample Sheet Syntax Version: 2023-1.1 Last Updated: 2023-01-05 ==

{| class="wikitable sortable"
|-
! Sheet !! Parameter !! Description !! Rules !! Example !! Notes
|-
|  ||  ||  ||  ||  ||   || 
|-
| Bar || bar_name || unique name for this bar ||   || testbar ||  || 
|-
| Bar || sample_id || REQUIRED unique identifier for the sample.  ||  Must match any physical sample labels || EG01 ||  || 
|-
| Bar || sample_name || REQUIRED will be in file name (readable by whoever is loading and measuring samples) can  || Underscores are discouraged || P3HT-AN-120C ||  || 
|-
| Bar || project_name || REQUIRED folder name for data The largest degree of sample classification ||   || NEXAFS or RSoXS or OPVs ||  || 
|-
| Bar || institution || REQUIRED Short abbreviation of your institution that will be added to folder names.   || Be consistent across all beamtimes || NIST or NCSU or UPENN ||  || 
|-
| Bar || proposal_id || REQUIRED 6 digit proposal for this measurement.  This must match the a

# Example Output
https://wiki-nsls2.bnl.gov/beamline7ID1/index.php?title=User:Bijal

# Test Code

In [83]:
excelSheet = excelSheetInputPath
rulesSheetName = 'SheetRulesAndMetaData'
versionCell = 'B4'
startRow_Params = 7
endRow_Params = None
startColumn_Params = 'A'
endColumn_Params = 'F'

In [72]:
# Split versionCell in (row, column)
versRow, versColumn = [int(''.join(filter(str.isdigit, versionCell))), ''.join(filter(str.isalpha, versionCell))]

# Extract Version Code as a string
versionStr = pd.read_excel(excelSheet, sheet_name=rulesSheetName, index_col=None, usecols = versColumn, nrows=0, header = versRow-1)
versionStr = versionStr.columns.values[0]
#print(versionStr)

2023-1.1


In [159]:
# Extract Metadata Table

# If endRow_Params is provided, limit the number of rows parsed
if endRow_Params is None:
    numRows = None
else:    
    numRows = endRow_Params - startRow_Params    

# Convert column bounds to string
colString = startColumn_Params + ":" + endColumn_Params
    
excelMetadataIn = pd.read_excel(excelSheet, sheet_name=rulesSheetName, 
                                header=startRow_Params-1, nrows = numRows, 
                                usecols=colString)

# Replace NaNs and 'nan's with blank
excelMetadataIn = excelMetadataIn.replace('nan', '')
excelMetadataIn = excelMetadataIn.fillna('')
#display(excelMetadataIn)

# Construct MediaWiki Table
outStr = r'{| class="wikitable sortable"' + "\n" + '|-\n'

#Add header row elements
outStr += '! '
for colHeader in excelMetadataIn.columns:
    filteredColHeader = str(colHeader).replace('\r', ' ').replace('\n', ' ')
    outStr += f"{filteredColHeader} !! "

#trim extra "!! "
outStr = outStr[:-4]

# Add Metadata Row Elements

## Loop through metadata rows
for mdRow in excelMetadataIn.index-1:
    ###Loop through columns
    outStr += '\n|-\n| '
    for mdVal in excelMetadataIn.iloc[mdRow].to_list():
        filteredmdVal = str(mdVal).replace('\r', ' ').replace('\n', ' ')
        outStr += f"{filteredmdVal} || "
        pass
        #print(str(mdVal) + "\n")

# Add MediaWiki Table End
outStr += "\n|}"
#print(outStr)

return outStr


{| class="wikitable sortable"
|-
! Sheet !! Parameter !! Description !! Rules !! Example !! Notes
|-
|  ||  ||  ||  ||  ||   || 
|-
| Bar || bar_name || unique name for this bar ||   || testbar ||  || 
|-
| Bar || sample_id || REQUIRED unique identifier for the sample.  ||  Must match any physical sample labels || EG01 ||  || 
|-
| Bar || sample_name || REQUIRED will be in file name (readable by whoever is loading and measuring samples) can  || Underscores are discouraged || P3HT-AN-120C ||  || 
|-
| Bar || project_name || REQUIRED folder name for data The largest degree of sample classification ||   || NEXAFS or RSoXS or OPVs ||  || 
|-
| Bar || institution || REQUIRED Short abbreviation of your institution that will be added to folder names.   || Be consistent across all beamtimes || NIST or NCSU or UPENN ||  || 
|-
| Bar || proposal_id || REQUIRED 6 digit proposal for this measurement.  This must match the approved proposal for this beamtime. General user proposals generally begin w

In [24]:
excelDataIn

Unnamed: 0,0,1,2,3,4,5
0,Sheet Rules and Metadata,Users should not modify this sheet,,,,
1,,,,,,
2,,,Syntax,,,
3,Sheet Version Number,2023-1.1,Cycle Active.Version,,,
4,,,,,,
5,,,,,,
6,Sheet,Parameter,Description,Rules,Example,Notes
7,Bar,bar_name,unique name for this bar,,testbar,
8,Bar,sample_id,REQUIRED\nunique identifier for the sample.,Must match any physical sample labels,EG01,
9,Bar,sample_name,REQUIRED\nwill be in file name (readable by wh...,Underscores are discouraged,P3HT-AN-120C,
