# XML to JSON to CSV

In [1]:
import json
import xmltodict
import pandas as pd


def flatten_json_to_rows(data, parent_key='', sep='_'):
    """Flatten nested JSON into rows for DataFrame."""
    if isinstance(data, dict):
        rows = [{}]
        for k, v in data.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else k
            if isinstance(v, list):
                expanded_rows = []
                for row in rows:
                    for item in v:
                        flattened_item = flatten_json_to_rows(item, new_key, sep=sep)
                        for flat_row in flattened_item:
                            new_row = row.copy()
                            new_row.update(flat_row)
                            expanded_rows.append(new_row)
                rows = expanded_rows
            elif isinstance(v, dict):
                expanded_rows = []
                for row in rows:
                    flattened_dict = flatten_json_to_rows(v, new_key, sep=sep)
                    for flat_row in flattened_dict:
                        new_row = row.copy()
                        new_row.update(flat_row)
                        expanded_rows.append(new_row)
                rows = expanded_rows
            else:
                for row in rows:
                    row[new_key] = v
        return rows
    elif isinstance(data, list):
        rows = []
        for item in data:
            rows.extend(flatten_json_to_rows(item, parent_key, sep=sep))
        return rows
    else:
        return [{parent_key: data}]


def main():
    # Step 1: Convert XML to JSON
    with open("nested_data.xml") as xml_file:
        data_dict = xmltodict.parse(xml_file.read())
        with open("data1.json", "w") as json_file:
            json.dump(data_dict, json_file, indent=4)

    # Step 2: Flatten JSON and Convert to CSV
    flattened_data = flatten_json_to_rows(data_dict)

    dataframe = pd.DataFrame(flattened_data)
    dataframe.columns = [col.replace("root_", "") for col in dataframe.columns]
    
    dataframe.to_csv("nested.csv", index=False)

    print("XML converted to JSON and CSV successfully!")


if __name__ == "__main__":
    main()


XML converted to JSON and CSV successfully!


In [2]:
with open("nested_data.xml", "r") as xml_file:
    print(xml_file.read())


<root>
    <person>
        <name>
            <first>John</first>
            <last>Doe</last>
        </name>
        <address>
            <street>Main Street</street>
            <city>Springfield</city>
        </address>
        <contacts>
            <contact>
                <type>email</type>
                <value>john.doe@example.com</value>
            </contact>
            <contact>
                <type>phone</type>
                <value>123456789</value>
            </contact>
        </contacts>
    </person>
    <person>
        <name>
            <first>Jane</first>
            <last>Smith</last>
        </name>
        <address>
            <street>Second Street</street>
            <city>Metropolis</city>
        </address>
        <contacts>
            <contact>
                <type>email</type>
                <value>jane.smith@example.com</value>
            </contact>
            <contact>
                <type>phone</type>
                <value>987654321</

In [3]:
with open("data1.json", "r") as json_file:
    print(json_file.read())


{
    "root": {
        "person": [
            {
                "name": {
                    "first": "John",
                    "last": "Doe"
                },
                "address": {
                    "street": "Main Street",
                    "city": "Springfield"
                },
                "contacts": {
                    "contact": [
                        {
                            "type": "email",
                            "value": "john.doe@example.com"
                        },
                        {
                            "type": "phone",
                            "value": "123456789"
                        }
                    ]
                }
            },
            {
                "name": {
                    "first": "Jane",
                    "last": "Smith"
                },
                "address": {
                    "street": "Second Street",
                    "city": "Metropolis"
                },
         

In [4]:
pd.read_csv("nested.csv")

Unnamed: 0,person_name_first,person_name_last,person_address_street,person_address_city,person_contacts_contact_type,person_contacts_contact_value
0,John,Doe,Main Street,Springfield,email,john.doe@example.com
1,John,Doe,Main Street,Springfield,phone,123456789
2,Jane,Smith,Second Street,Metropolis,email,jane.smith@example.com
3,Jane,Smith,Second Street,Metropolis,phone,987654321


# Shallow Copy Vs Deep Copy

| **Aspect**       | **Shallow Copy**                                    | **Deep Copy**                                   |
|-------------------|----------------------------------------------------|------------------------------------------------|
| **Definition**    | Creates a new object but references the original objects inside the copied object. | Creates a new object and recursively copies all objects inside the original object. |
| **Copy Level**    | Copies only the top-level structure.               | Copies both the top-level structure and nested structures. |
| **Memory**        | Changes in the nested (mutable) objects of the original will reflect in the copy. | Changes in the original nested objects do not affect the deep copy. |
| **Performance**   | Faster, as it does not create new objects for nested elements. | Slower, as it creates new objects recursively. |
| **Modules Used**  | `copy.copy()`                                      | `copy.deepcopy()`                              |

### Examples of Shallow Copy and Deep Copy
1. Shallow Copy Example

In [5]:
import copy

original_list = [[1, 2, 3], [4, 5, 6]]

shallow_copy = copy.copy(original_list)

original_list[0][0] = 99

print("Original List:", original_list)  
print("Shallow Copy: ", shallow_copy)  


Original List: [[99, 2, 3], [4, 5, 6]]
Shallow Copy:  [[99, 2, 3], [4, 5, 6]]


### Explanation:

- The shallow_copy references the same nested lists as the original_list.
- Modifying original_list[0][0] affects shallow_copy because the inner lists are not copied.

2. Deep Copy Example

In [6]:
import copy

original_list = [[1, 2, 3], [4, 5, 6]]

deep_copy = copy.deepcopy(original_list)

original_list[0][0] = 99

print("Original List:", original_list)
print("Deep Copy:    ", deep_copy)      


Original List: [[99, 2, 3], [4, 5, 6]]
Deep Copy:     [[1, 2, 3], [4, 5, 6]]


### Explanation:

- The deep_copy creates a completely new copy of the original_list and its nested elements.
- Modifying original_list[0][0] does not affect the deep_copy because the nested lists are independent.

### Illustration: Shallow vs Deep Copy

| **Original List** | **Shallow Copy Behavior**       | **Deep Copy Behavior**          |
|-------------------|----------------------------------|----------------------------------|
| `[[1, 2, 3], [4, 5, 6]]` | References the same nested lists inside. | Recursively creates new copies of all nested lists. |
| Modification: `original_list[0][0] = 99` | Affects both `original_list` and `shallow_copy`. | Affects only `original_list`. |


### Key Takeaway

- Use shallow copy when you don’t need independent copies of nested elements.
- Use deep copy when you need a completely independent duplicate of the entire object, including all nested structures.