In [None]:
import pandas as pd
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, XSD

# File paths for all datasets
file_paths = {
    'Bio2RDF all log2013_KG2024': 'Bio2RDF log2013kg2024_combined_schema_elements.csv',
    'Bio2RDF all log2019_KG2024': 'Bio2RDF log2019kg2024_combined_schema_elements.csv',
    'Bio2RDF robotic log2019_KG2024': 'Bio2RDF robotic log2019_kg2024_combined_schema_elements.csv',
    'Bio2RDF organic log2019KG2024': 'Bio2RDF organic log2019kg2024_combined_schema_elements.csv',
    'Wikidata organic log2017KG2017': 'Wikidata log2017kg2017_combined_schema_elements.csv',
    'Wikidata robotic log2017_KG2017': 'Wikidata robotic log2017_kg2017_combined_schema_elements.csv',
    'Wikidata organic log2017KG2018': 'Wikidata log2017kg2018_combined_schema_elements.csv',
    'Wikidata robotic log2018_KG2018': 'Wikidata robotic log2018_kg2018_combined_schema_elements.csv',
    'Wikidata organic int1 log2017KG2017': 'Wikidata organic-int1 log2018_kg2018_combined_schema_elements.csv',
    'Wikidata organic int7 log2018_KG2018': 'Wikidata organic-int7 log2018_kg2018_combined_schema_elements.csv'
}

# URIs for RDF properties and types
SCH = Namespace("http://schema.org/")  # Schema.org namespace with 'sch:' prefix
TYPE_URI = URIRef("https://www.wikidata.org/wiki/Q379825")  # Wikidata class for schema elements
FREQUENCY_URI = SCH.frequency  # Schema.org frequency property

# Function to convert CSV to TTL
def csv_to_ttl(input_csv_path, output_ttl_path):
    # Load CSV data
    df = pd.read_csv(input_csv_path)
    
    # Create an RDF graph
    g = Graph()
    g.bind("sch", SCH)  # Bind schema.org namespace to 'sch'
    
    # Process each row
    for _, row in df.iterrows():
        schema_element_uri = row["Schema Element"]
        total_count = row["TotalCount"]
        
        # Use the schema element directly as a URI
        schema_uri = URIRef(schema_element_uri)
        
        # Add triples
        g.add((schema_uri, RDF.type, TYPE_URI))  # Type triple
        g.add((schema_uri, FREQUENCY_URI, Literal(total_count, datatype=XSD.integer)))  # Frequency triple
    
    # Serialize graph
    with open(output_ttl_path, "w") as f:
        f.write(g.serialize(format="turtle"))
    print(f"Converted {input_csv_path} to {output_ttl_path}")

# Process all files
for dataset_name, csv_file in file_paths.items():
    output_file = f"{dataset_name.replace(' ', '_')}.ttl"  # Generate output file name
    csv_to_ttl(csv_file, output_file)


Converted Bio2RDF log2013kg2024_combined_schema_elements.csv to Bio2RDF_all_log2013_KG2024.ttl
Converted Bio2RDF log2019kg2024_combined_schema_elements.csv to Bio2RDF_all_log2019_KG2024.ttl
Converted Bio2RDF robotic log2019_kg2024_combined_schema_elements.csv to Bio2RDF_robotic_log2019_KG2024.ttl
Converted Bio2RDF organic log2019kg2024_combined_schema_elements.csv to Bio2RDF_organic_log2019KG2024.ttl
Converted Wikidata log2017kg2017_combined_schema_elements.csv to Wikidata_organic_log2017KG2017.ttl
Converted Wikidata robotic log2017_kg2017_combined_schema_elements.csv to Wikidata_robotic_log2017_KG2017.ttl
Converted Wikidata log2017kg2018_combined_schema_elements.csv to Wikidata_organic_log2017KG2018.ttl
Converted Wikidata robotic log2018_kg2018_combined_schema_elements.csv to Wikidata_robotic_log2018_KG2018.ttl
