# Download Chowlk as submodule and install the package using pip 

In [None]:
!git submodule update --init --recursive
!pip install ..

# Load packages

In [1]:
%load_ext autoreload
%autoreload 2

from data2rdf.cli.abox_conversion import run_abox_pipeline_for_folder 
from data2rdf.annotation_pipeline import AnnotationPipeline
import warnings
import os
warnings.filterwarnings('ignore')


# CSV Example

Following is a sample of running the pipeline which demonstrates the required input files.
These include:
- The raw **data**
- a **method graph** which defines the abox in the experiment's scope; It is initially a .drawio that defines the relationship among entities in the data and their properties; Please refer to [this tutorial](https://data2rdf.readthedocs.io/en/latest/workflow.html#abox-skeleton) on the complete process of creating a method graph
- a [**mapping**](https://data2rdf.readthedocs.io/en/latest/workflow.html#data-method-mapping) is required to associates entities in the data with their equivalent entity within the method graph 

There are a number of intermediate files created within the pipeline:
- `run_abox_pipeline_for_folder` creates a .ttl file of the defined abox

Finally, the pipeline creates an rdf graph of the column data 

In [18]:
working_folder = os.path.join("../" ,"tests", "csv_pipeline_test")

abox_folder_path = os.path.join(working_folder,"input" ,  "method-graph")
run_abox_pipeline_for_folder(abox_folder_path)

output_folder = os.path.join(working_folder,"output")
template = os.path.join(abox_folder_path, "tensile_test_method_v6","tensile_test_method_v6.mod.ttl")
mapping_file = os.path.join(working_folder,"input" , "mapping" ,"tensile_test_mapping.xlsx")
raw_data = os.path.join(working_folder, "input" , "data" ,"DX56_D_FZ2_WR00_43.TXT")

parser = "csv"
parser_args = {
      "header_sep":"\t",
      "column_sep":"\t",
      "header_length":20
   }

pipeline = AnnotationPipeline(
    raw_data,
    parser,
    parser_args,
    template,
    mapping_file,
    output_folder,
    data_download_iri = "https://127.0.0.1/id",
)

pipeline.run_pipeline()

Of 26 data individuals, 0 were successfully mapped to the method. See the data.mapping-result.xlsx file for mapping results.


In [None]:
for file in os.listdir(output_folder):
    print(file)
    

# The final complete graph including data graph, mapping graph and process graph can be used as rdflib object or exported as ttl.
## The ttl export can be used as input for the DSMS or any triplstore.

In [None]:
pipeline = AnnotationPipeline(
    raw_data,
    parser,
    parser_args,
    template,
    mapping_file,
    output_folder,
)

pipeline.create_output() #set all paths but don't run the pipeline (since it was run in the block before)

g = pipeline.export_graph()
print(len(g))
pipeline.export_ttl(os.path.join(output_folder, 'merged_graph.ttl'))


# Excel parser example

In [None]:
working_folder = os.path.join("../" ,"tests", "xls_pipeline_test")
abox_folder_path = os.path.join(working_folder,"input" ,  "method-graph")
run_abox_pipeline_for_folder(abox_folder_path)

output_folder = os.path.join(working_folder,"output")
template = os.path.join(abox_folder_path, "tensile_test_method_v6","tensile_test_method_v6.mod.ttl")
mapping_file = os.path.join(working_folder, "input" , "mapping","mapping.xlsx")
raw_data = os.path.join(working_folder,"input" , "data" ,"AFZ1-Fz-S1Q.xlsm")
location_mapping = os.path.join(working_folder, "input" , "mapping" ,"location_mapping.xlsx")

parser = "excel"
parser_args = {
    "location_mapping_f_path":location_mapping,
   }

pipeline = AnnotationPipeline(
    raw_data,
    parser,
    parser_args,
    template,
    mapping_file,
    output_folder,
    base_iri = "http://www.test4.de"
)

pipeline.run_pipeline()