### NOTES:
This module aims to be used in following situation: 
- when a network graph in obsidian is already complete, but still need to append several new records from WoS, then use this module to read records that are directly downloaded from WoS.

### REQUIREMENT:
- should fit with format of the Tab-Deliminated-File that is directly downloaded from WoS.
### ***better to be a more general reader for this kind of text***.

### PROBLEM:
* The records directly from WoS have different column names in several places, relative to HistCite records:
    - NR == NCR
    - no: LCS, LCR
* Notify: it seems like: if the SOURCE name of a record is already a abbreviated form under columns of 'SO', then the 'J9' column of this record would be empty; so when using 'J9', it is better to check for `None`;
* Waited for solving: the method of use template now is a little bit complicated and unprettified; 
- maybe to use a separate function to process the print;
- or remove the `usecols` and to read all columns into the dataframe, and call specific element directly using the column name of it.
### CODES:

In [1]:
def formating_from_TabDeliFile_to_md(csv_path: str, use_cols: list, templ:str):
    '''
    Usage: extract inforamtion from csv and write to markdown according with the given template;

    :param use_cols: 
        should include all information that you intend to show in the markdown;
        should always include 'J9', 'CR' and put them in the end of list;
    
    Notes: Output path is assigned to: './Project/'
    '''

    import re
    import pandas as pd

    dtfm = pd.read_table(
        csv_path,
        header=0, index_col=False, usecols=use_cols, keep_default_na=False, encoding='UTF_8'
    )

    lst_output = []
    for row in range(dtfm.shape[0]): # single for_loop for all process
        au = dtfm.loc[row]['AU'].split('; ')[0].replace(',', '')        

        filename = au +', '+ str(dtfm.loc[row]['PY']) +', '+ dtfm.loc[row]['SO'] +'.md'

        templst_1 = []
        for each_CR in dtfm.loc[row]['CR'].split('; '):
            temp_CR = re.sub(r'[\[\]]+', repl='', string=each_CR)
            templst_2 = []
            for part in temp_CR.split(', '):
                if not re.match(r'^[PV][\d]*$|^DOI ', part): # ths ver trying to remove DOI in filename & content link
                    templst_2.append(part)
            templst_1.append('[[' + ', '.join(templst_2) + ']]')
        output_CR = '\n'.join(templst_1)
        
        output_UT = dtfm.loc[row]['UT'].replace('WOS:','')

        filecontent = templ.format(
            dtfm.loc[row][use_cols[0]],       #0
            dtfm.loc[row][use_cols[1]],       #1
            dtfm.loc[row][use_cols[2]],       #2
            dtfm.loc[row][use_cols[3]],       #3
            dtfm.loc[row][use_cols[4]],       #4
            dtfm.loc[row][use_cols[5]],       #5
            dtfm.loc[row][use_cols[6]],       #6
            dtfm.loc[row][use_cols[7]],       #7
            dtfm.loc[row][use_cols[8]],       #8
            output_UT,                        #9
            str(dtfm.loc[row][use_cols[10]]), #10
            str(dtfm.loc[row][use_cols[11]]), #11
            output_CR,                        #12
        )

        lst_output.append((filename, filecontent))
    
    # save files:
    for i in range(len(lst_output)):
        with open(f'./Projects/{lst_output[i][0]}', 'xt', encoding='UTF_8') as f:
            f.write(lst_output[i][1])
            f.close()
    # return lst_output, dtfm

def set_tag_to_Bi(authors:str):
    import re

    lst_au = []
    for author in authors.split('; '):
        if re.match(r'B[iI], YC?|B[iI], Yanchao', author):
            lst_au.append('#BiYC')
        else:
            lst_au.append(author)
    return '; '.join(lst_au)



'''
Initiation
'''

path = 'Data/CiteAnalysisData/Della_2018_TDF.txt'
#       0     1    2   3    4    5    6    7    8    9    10   11   12   13     
use = ['TI','AU','PY','DT','SO','AB','DE','ID','DI','UT','NR','TC','CR']
template = '''---
Alias: NeuroBasis
TC: {11}
---
#show_on
**Title**: {0}
**Authors**: {1}
**PubYear**: #PY{2}
**DocType**: #{3}
**Journal**: {4}
> **Abstract**:
> {5}

**AuthKW**: {6}
**Keywords+**: {7}
**DOI**: {8}
**WoS**: {9}

#### CitedRefs: ({10})
{12}'''

formating_from_TabDeliFile_to_md(csv_path=path, use_cols=use, templ=template)

In [3]:
import pandas as pd

use_cols = ['TI','AU','PY','DT','SO','AB','DE','ID','DI','UT','NCR','TC','LCS','LCR','J9','CR']
csv_path = 'Data/CiteAnalysisData/Della_2018_TDF.txt'
dtfm = pd.read_table(
        csv_path,
        header=0, index_col=False, keep_default_na=False, encoding='UTF_8'
    )

In [1]:
with open('Data/CiteAnalysisData/Della_2018_TDF.txt', 'rt', encoding='utf_8') as f:
    row_idx = f.readline()
    f.close()

In [6]:
dtfm.loc[:, 'SO']

0    NEUROIMAGE
Name: SO, dtype: object