In [1]:
pip install xmltodict

Note: you may need to restart the kernel to use updated packages.


## XML 2 JSON
You can call the function xml_to_json(xml_file, json_file) where xml_file is the path of the xml file and json_file is the path of the json file you want to create.

This code uses the xmltodict library to convert the XML data to a dictionary, and then the json module to write the dictionary to the JSON file.

Please note that this code will only convert the first level of the xml tags into json object, if you would like to convert all the nested tags you may need to use a different library like 'lxml'

Also the conversion may not be exact as json and xml have different ways of representing data, but this code will give you a good starting point.

In [2]:
import xmltodict
import json

def xml_to_json(xml_file, json_file):
    # Open the XML file
    with open('sample_file.xml', 'r') as f:
        xml_data = f.read()

    # Convert the XML data to a dictionary
    json_data = json.dumps(xmltodict.parse(xml_data))

    # Write the JSON data to the JSON file
    with open(json_file, 'w') as f:
        f.write(json_data)

xml_to_json('data.xml', 'data1.json')

## indentation 
In this example, the json.dump(json_data, f, indent=4) line will write the data in json_data to the json file f and indent the data by 4 spaces.

You can adjust the indentation by changing the value passed to the indent parameter. If you want to use '\n' instead of spaces for indentation you can use json.dump(json_data, f, indent='\t')

This will create a human-readable JSON file, where the data is indented, making it easier to read and understand.


In [4]:
import xmltodict
import json

def xml_to_json(xml_file, json_file):
    # Open the XML file
    with open('sample_file.xml', 'r') as f:
        xml_data = f.read()

    # Convert the XML data to a dictionary
    json_data = xmltodict.parse(xml_data)

    # Write the JSON data to the JSON file
    with open(json_file, 'w') as f:
        json.dump(json_data, f, indent=4)

xml_to_json('data.xml', 'data_indent_1.json')


## json.dump(json_data, f, indent='\t')

This will create a human-readable JSON file, where the data is indented, making it easier to read and understand.


In [5]:
import xmltodict
import json

def xml_to_json(xml_file, json_file):
    # Open the XML file
    with open('sample_file.xml', 'r') as f:
        xml_data = f.read()

    # Convert the XML data to a dictionary
    json_data = xmltodict.parse(xml_data)

    # Write the JSON data to the JSON file
    with open(json_file, 'w') as f:
        json.dump(json_data, f, indent='\t')


xml_to_json('data.xml', 'data_indent_2.json')


In [8]:
import json
import csv

def json_to_csv(json_file, csv_file):
    # Open the JSON file
    with open('data_indent_2.json', 'r') as f:
        json_data = json.load(f)

    # Write the JSON data to the CSV file
    with open(csv_file, 'w') as f:
        writer = csv.DictWriter(f, json_data[0].keys())
        writer.writeheader()
        for row in json_data:
            writer.writerow(row)

json_to_csv('data.json', 'data.csv')


KeyError: 0

In [10]:
import json
import csv
import os

def json_to_csv(json_file, csv_file):
    # Open the JSON file
    with open('data_indent_2.json', 'r') as f:
        json_data = json.load(f)

    # Create the CSV file if it does not exist
    if not os.path.isfile(csv_file):
        with open(csv_file, 'w') as f:
            writer = csv.DictWriter(f, json_data[0].keys())
            writer.writeheader()
    # Write the JSON data to the CSV file
    with open(csv_file, 'a') as f:
        writer = csv.DictWriter(f, json_data[0].keys())
        for row in json_data:
            writer.writerow(row)

json_to_csv('data_indent_2.json', 'data.csv')

KeyError: 0

In [18]:
import json
import csv
import os

def json_to_csv(json_file, csv_file):
    # Open the JSON file
    with open('data_indent_2.json', 'r') as f:
        json_data = json.load(f)

    # print ("json_data =", json_data)
    print ("json_data =", json_data[0].keys())
           
    # check if json_data is not empty
    if json_data:
        # Create the CSV file if it does not exist
        if not os.path.isfile(csv_file):
            with open(csv_file, 'w') as f:
                writer = csv.DictWriter(f, json_data[0].keys())
                writer.writeheader()
        # Write the JSON data to the CSV file
        with open(csv_file, 'a') as f:
            writer = csv.DictWriter(f, json_data[0].keys())
            for row in json_data:
                writer.writerow(row)
    else:
        print("json_data list is empty, no data to write.")

json_to_csv('data_indent_2.json', 'data.csv')


KeyError: 0

In [19]:
import json
import csv
import os

def json_to_csv(json_file, csv_file):
    # Open the JSON file
    with open('data.json', 'r') as f:
        json_data = json.load(f)

    # check if json_data is not empty
    if json_data:
        fieldnames = list(json_data[0].keys())
        # Create the CSV file if it does not exist
        if not os.path.isfile(csv_file):
            with open(csv_file, 'w') as f:
                writer = csv.DictWriter(f, fieldnames)
                writer.writeheader()
        # Write the JSON data to the CSV file
        with open(csv_file, 'a') as f:
            writer = csv.DictWriter(f, fieldnames)
            for row in json_data:
                writer.writerow(row)
    else:
        print("json_data list is empty, no data to write.")

json_to_csv('data.json', 'data.csv')


KeyError: 0

In [20]:
import json
import csv
import os

def json_to_csv(json_file, csv_file):
    # Open the JSON file
    with open('data.json', 'r') as f:
        json_data = json.load(f)

    # check if json_data is not empty
    if json_data:
        fieldnames = list(json_data[0].keys())
        # Create the CSV file if it does not exist
        if not os.path.isfile(csv_file):
            with open(csv_file, 'w') as f:
                writer = csv.DictWriter(f, fieldnames)
                writer.writeheader()
        # Write the JSON data to the CSV file
        with open(csv_file, 'a') as f:
            writer = csv.DictWriter(f, fieldnames)
            for row in json_data:
                writer.writerow(row)
    else:
        print("json_data list is empty, no data to write.")

json_to_csv('data.json', 'data.csv')


KeyError: 0

In [22]:
import json
import csv
import os

def json_to_csv(json_file, csv_file):
    # Open the JSON file
    with open('data.json', 'r') as f:
        json_data = json.load(f)

    print("json_data=", json_data) 
    if isinstance(json_data, list) and json_data and all(isinstance(i, dict) for i in json_data):
        fieldnames = list(json_data[0].keys())
        # Create the CSV file if it does not exist
        if not os.path.isfile(csv_file):
            with open(csv_file, 'w') as f:
                writer = csv.DictWriter(f, fieldnames)
                writer.writeheader()
        # Write the JSON data to the CSV file
        with open(csv_file, 'a') as f:
            writer = csv.DictWriter(f, fieldnames)
            for row in json_data:
                writer.writerow(row)
    else:
        print("json_data is empty or not a list of dictionaries, no data to write.")

json_to_csv('data.json', 'data.csv')


json_data= {'sigmacollect': {'@gendatetime': '2018-12-31T23:59:59', '@filename': 'SIGMA_REF362-001-001-0_OF123456_OP460010_SNAB654321_20181231235959_0002.xml', '@version': '1.0.3', '@xsi:noNamespaceSchemaLocation': 'SigmaCollect_v1.0.3.05.xsd', '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'partner': {'@id': '27D321E8-16B0-44A9-A1CF-603F1DE6F87B', '@type': 'internal', '@factory': 'CBL'}, 'workshop': {'@name': 'MMT-IBLE'}, 'joborder': [{'@xsi:type': 'baan', '@id': '123456', '@launchingdate': '2018-12-25', '@partidproduct': '362-001-001-0S1', '@mod': 'MODA12345', '@taskcode': '362-001-001-0S1_01_015', '@mgmtope': '460010', '@descope': 'Contrôle MMT final'}, {'@xsi:type': 'syteline', '@id': '123456', '@launchingdate': '2018-12-25', '@partidproduct': '362-001-001-0S1', '@mod': 'MODA12345', '@taskcode': '362-001-001-0S1_01_015', '@mgmtope': '460010', '@descope': 'Contrôle MMT final', 'serials': {'serial': {'@value': 'AB123456'}}}], 'machine': {'@idcmms': 'CMM004', '@name': 'FAN

In [23]:
import pandas as pd
import json

# Load the JSON file
with open("data.json", "r") as json_file:
    json_data = json.load(json_file)

# Convert the JSON data to a DataFrame
df = pd.json_normalize(json_data)

# Print the DataFrame
print(df)


  sigmacollect.@gendatetime  \
0       2018-12-31T23:59:59   

                              sigmacollect.@filename sigmacollect.@version  \
0  SIGMA_REF362-001-001-0_OF123456_OP460010_SNAB6...                 1.0.3   

  sigmacollect.@xsi:noNamespaceSchemaLocation  \
0                  SigmaCollect_v1.0.3.05.xsd   

                     sigmacollect.@xmlns:xsi  \
0  http://www.w3.org/2001/XMLSchema-instance   

               sigmacollect.partner.@id sigmacollect.partner.@type  \
0  27D321E8-16B0-44A9-A1CF-603F1DE6F87B                   internal   

  sigmacollect.partner.@factory sigmacollect.workshop.@name  \
0                           CBL                    MMT-IBLE   

                               sigmacollect.joborder  \
0  [{'@xsi:type': 'baan', '@id': '123456', '@laun...   

  sigmacollect.machine.@idcmms sigmacollect.machine.@name  \
0                       CMM004      FAN BLADE - MMT FINAL   

  sigmacollect.recipe.@name sigmacollect.cycle.@starttime  \
0          C-02-1-2