In [4]:
import requests
import xml.etree.ElementTree as ET
import json
import sys
from datetime import datetime

def fetch_xml_from_url(url):
    """Fetch XML data from the specified URL"""
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.RequestException as e:
        print(f"Error fetching XML from {url}: {e}")
        return None

def strip_ns(tag):
    """Remove namespace from XML tag"""
    return tag.split('}', 1)[-1] if '}' in tag else tag

def parse_iso(ts):
    """Parse ISO timestamp with proper handling of microseconds and timezone"""
    ts = ts.replace('Z', '+00:00')
    if '.' in ts:
        date_part, rest = ts.split('.', 1)
        micro, *tz = rest.split('+')
        if len(micro) < 6:
            micro = micro.ljust(6, '0')
        tz_str = '+' + tz[0] if tz else ''
        ts = f"{date_part}.{micro}{tz_str}"
    return datetime.fromisoformat(ts)

def xml_to_dict(element):
    """Convert XML element to dictionary"""
    result = {}
    
    # Handle attributes
    if element.attrib:
        result['@attributes'] = element.attrib
    
    # Handle text content
    if element.text and element.text.strip():
        text = element.text.strip()
        # Try to parse as timestamp if it looks like one
        if 'T' in text and ('Z' in text or '+' in text or '-' in text):
            try:
                result['#text'] = parse_iso(text).isoformat()
            except ValueError:
                result['#text'] = text
        else:
            result['#text'] = text
    
    # Handle child elements
    for child in element:
        tag = strip_ns(child.tag)
        child_dict = xml_to_dict(child)
        
        if tag in result:
            # Convert to list if multiple elements with same tag
            if not isinstance(result[tag], list):
                result[tag] = [result[tag]]
            result[tag].append(child_dict)
        else:
            result[tag] = child_dict
    
    return result

def convert_xml_to_json(xml_string):
    """Convert XML string to JSON"""
    try:
        root = ET.fromstring(xml_string)
        xml_dict = xml_to_dict(root)
        return json.dumps(xml_dict, indent=2, ensure_ascii=False)
    except ET.ParseError as e:
        print(f"Error parsing XML: {e}")
        return None

def main():
    url = "http://192.168.100.241:5000/sample"
    
    print(f"Fetching XML data from {url}...")
    xml_data = fetch_xml_from_url(url)
    
    if xml_data is None:
        print("Failed to fetch XML data")
        sys.exit(1)
    
    print("Converting XML to JSON...")
    json_data = convert_xml_to_json(xml_data)
    
    if json_data is None:
        print("Failed to convert XML to JSON")
        sys.exit(1)
    
    # Output to stdout
    print(json_data)
    
    # Optionally save to file
    try:
        with open('output.json', 'w', encoding='utf-8') as f:
            f.write(json_data)
        print("\nJSON data also saved to output.json")
    except IOError as e:
        print(f"Warning: Could not save to file: {e}")

In [5]:
main()

Fetching XML data from http://192.168.100.241:5000/sample...


Converting XML to JSON...
{
  "@attributes": {
    "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation": "urn:mtconnect.org:MTConnectStreams:1.5 http://schemas.mtconnect.org/schemas/MTConnectStreams_1.5.xsd"
  },
  "Header": {
    "@attributes": {
      "creationTime": "2025-07-07T14:30:10Z",
      "sender": "apm-86xxx-ie4000",
      "instanceId": "1720551353",
      "version": "1.5.0.14",
      "bufferSize": "1024",
      "nextSequence": "6628181",
      "firstSequence": "6628081",
      "lastSequence": "6629104"
    }
  },
  "Streams": {
    "DeviceStream": {
      "@attributes": {
        "name": "Mazak",
        "uuid": "Mazak"
      },
      "ComponentStream": [
        {
          "@attributes": {
            "component": "Controller",
            "name": "controller",
            "componentId": "cont"
          },
          "Samples": {
            "AccumulatedTime": [
              {
                "@attributes": {
                  "dataItemId": "auto_time",
          

In [3]:
import pandas as pd
import json

# Load the JSON data from the output.json file
with open('output.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Convert to pandas DataFrame
# Since the JSON structure is nested, we'll need to flatten it or extract specific parts
# Let's start by examining the structure
print("JSON structure keys:", list(data.keys()))

# If you want to work with the Streams data specifically:
if 'Streams' in data:
    streams_data = data['Streams']
    print("Streams data keys:", list(streams_data.keys()))
    
    # Convert to DataFrame - this will depend on your specific needs
    # For now, let's create a simple DataFrame from the data
    df = pd.json_normalize(data)
    print("\nDataFrame shape:", df.shape)
    print("\nDataFrame columns:", df.columns.tolist())
    print("\nFirst few rows:")
    print(df.head())

# Alternative: If you want to extract specific nested data
# For example, to extract all ComponentStream data:
def extract_component_streams(json_data):
    """Extract ComponentStream data into a flat DataFrame"""
    components = []
    
    if 'Streams' in json_data and 'DeviceStream' in json_data['Streams']:
        device_stream = json_data['Streams']['DeviceStream']
        if 'ComponentStream' in device_stream:
            for component in device_stream['ComponentStream']:
                # Extract component attributes
                comp_attrs = component.get('@attributes', {})
                
                # Extract samples if they exist
                samples = component.get('Samples', {})
                for sample_type, sample_data in samples.items():
                    if isinstance(sample_data, list):
                        for sample in sample_data:
                            sample_attrs = sample.get('@attributes', {})
                            components.append({
                                'component': comp_attrs.get('component'),
                                'component_name': comp_attrs.get('name'),
                                'component_id': comp_attrs.get('componentId'),
                                'sample_type': sample_type,
                                'data_item_id': sample_attrs.get('dataItemId'),
                                'timestamp': sample_attrs.get('timestamp'),
                                'sequence': sample_attrs.get('sequence'),
                                'sub_type': sample_attrs.get('subType'),
                                'value': sample.get('#text')
                            })
                    else:
                        sample_attrs = sample_data.get('@attributes', {})
                        components.append({
                            'component': comp_attrs.get('component'),
                            'component_name': comp_attrs.get('name'),
                            'component_id': comp_attrs.get('componentId'),
                            'sample_type': sample_type,
                            'data_item_id': sample_attrs.get('dataItemId'),
                            'timestamp': sample_attrs.get('timestamp'),
                            'sequence': sample_attrs.get('sequence'),
                            'sub_type': sample_attrs.get('subType'),
                            'value': sample_data.get('#text')
                        })
    
    return pd.DataFrame(components)

# Extract component streams data
component_df = extract_component_streams(data)
print("\nComponent Streams DataFrame:")
print(f"Shape: {component_df.shape}")
print(f"Columns: {component_df.columns.tolist()}")
print("\nFirst few rows:")
print(component_df.head())

# Convert timestamp to datetime if needed
if 'timestamp' in component_df.columns:
    component_df['timestamp'] = pd.to_datetime(component_df['timestamp'])
    print("\nTimestamp column converted to datetime")
    print(component_df['timestamp'].dtype)


JSON structure keys: ['@attributes', 'Header', 'Streams']
Streams data keys: ['DeviceStream']

DataFrame shape: (1, 12)

DataFrame columns: ['@attributes.{http://www.w3.org/2001/XMLSchema-instance}schemaLocation', 'Header.@attributes.creationTime', 'Header.@attributes.sender', 'Header.@attributes.instanceId', 'Header.@attributes.version', 'Header.@attributes.bufferSize', 'Header.@attributes.nextSequence', 'Header.@attributes.firstSequence', 'Header.@attributes.lastSequence', 'Streams.DeviceStream.@attributes.name', 'Streams.DeviceStream.@attributes.uuid', 'Streams.DeviceStream.ComponentStream']

First few rows:
  @attributes.{http://www.w3.org/2001/XMLSchema-instance}schemaLocation  \
0  urn:mtconnect.org:MTConnectStreams:1.5 http://...                      

  Header.@attributes.creationTime Header.@attributes.sender  \
0            2025-07-07T14:26:55Z          apm-86xxx-ie4000   

  Header.@attributes.instanceId Header.@attributes.version  \
0                    1720551353          