In [None]:
import json         # jsonl to json
import os

def convert_jsonl_to_json(input_jsonl_file, output_json_folder):
    # Ensure the output folder exists
    os.makedirs(output_json_folder, exist_ok=True)
    
    # Determine the output JSON filename
    base_name = os.path.splitext(os.path.basename(input_jsonl_file))[0]
    output_json_file = os.path.join(output_json_folder, base_name + '.json')
    
    # Read the JSONL file and aggregate the data
    data = []
    with open(input_jsonl_file, 'r') as jsonl_file:
        for line_number, line in enumerate(jsonl_file, start=1):
            line = line.strip()
            if not line:  # Skip empty lines
                continue
            try:
                data.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON on line {line_number}: {e}")
                continue
    
    # Write to the JSON file
    with open(output_json_file, 'w') as json_file:
        json.dump(data, json_file, indent=4)
    
    print(f"Converted {input_jsonl_file} to {output_json_file}")

# Example usage

input_jsonl_file = "inputfold"  # Change this to your actual input file path
output_json_folder = "outputfold"  # Change this to your desired output folder path

convert_jsonl_to_json(input_jsonl_file, output_json_folder)

In [None]:
import json            #json to csl-json
import datetime
def convert_to_csljson(input_file, output_file):
    # Step 1: Load the JSON file
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # Step 2: Check if the JSON data fits the CSL-JSON structure
    # Assuming the JSON file is a list of citation items
    csl_data = []
    for item in data:
        # Ensure that each item has the required CSL fields
        # Required fields: 'id', 'type', 'title'
        authors = []
        if 'creator' in item:
            # Assuming authors are provided as a list of full names in the input JSON
            for author in item['creator']:
                if isinstance(author, str):
                    # Split the full name into "given" and "family" parts
                    name_parts = author.split()
                    family_name = name_parts[-1]
                    given_name = " ".join(name_parts[:-1])
                    authors.append({'family': family_name, 'given': given_name})
                elif isinstance(author, dict):
                    # If the input JSON has a structured format with 'given' and 'family'
                    authors.append({
                        'family': author.get('family', ''),
                        'given': author.get('given', '')})
                    
        issued_date = {'date-parts': [[2024]]}  # Default date if none provided
        if 'datePublished' in item:
            try:
                # Try parsing if the date is in "YYYY-MM-DD" or "YYYY-MM" or "YYYY" format
                date_str = item['datePublished']
                parsed_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
                issued_date = {'date-parts': [[parsed_date.year, parsed_date.month, parsed_date.day]]}
            except ValueError:
                try:
                    parsed_date = datetime.datetime.strptime(date_str, "%Y-%m")
                    issued_date = {'date-parts': [[parsed_date.year, parsed_date.month]]}
                except ValueError:
                    try:
                        parsed_date = datetime.datetime.strptime(date_str, "%Y")
                        issued_date = {'date-parts': [[parsed_date.year]]}
                    except ValueError:
                        print(f"Date format in '{date_str}' not recognized, using default year.")         
       
       
       #this is the tricky bit you have to match up the titles of columns in the input with the actual fields that zotero requires 
       
        csl_item = {
            'id': item.get('id'),
            'type': item.get('docType', 'article-journal'),  # Default type if not present
            'title': item.get('title', 'Untitled'),
            'author': authors,
            'issued': issued_date,  # Default to current year
            'genre': item.get('tdmCategory'),
            'series': item.get('isPartOf'),
            'language': item.get('language')
        }
        
        # Add optional fields if present in the original JSON
         
        
        # Append to the CSL data list
        csl_data.append(csl_item)
    
    # Step 3: Save the data to a new file with .csljson extension
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(csl_data, f, ensure_ascii=False, indent=4)

    print(f"File successfully converted to {output_file}")

# Example usage:
# Convert 'input.json' to 'output.csljson'
convert_to_csljson("path in", "path out")