<a href="https://colab.research.google.com/github/MehrdadJalali-AI/Data_Management/blob/main/Metadata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import json

# Function to convert a row to a Schema.org Book JSON-LD format
def row_to_schema(row):
    return {
        "@context": "http://schema.org",
        "@type": "Book",
        "name": row['Title'],
        "author": {
            "@type": "Person",
            "name": row['Author']
        },
        "isbn": row['ISBN']
    }

# Read the CSV file
df = pd.read_csv('Book.csv')  # Replace 'books.csv' with the path to your CSV file

# Convert each row to Schema.org JSON-LD and store in a list
schemas = df.apply(row_to_schema, axis=1).tolist()

# Save the JSON-LD output to a file
with open('books_metadata.json', 'w') as f:
    json.dump(schemas, f, indent=4)

print("Metadata has been saved to 'books_metadata.json'")


Metadata has been saved to 'books_metadata.json'


In [10]:
import xml.etree.ElementTree as ET

# Parse the XML file
tree = ET.parse("metadata.xml")
root = tree.getroot()

# Extract Dublin Core elements
for element in root:
    print(f"{element.tag}: {element.text}")

{http://purl.org/dc/elements/1.1/}title: A Study on Data Management Practices
{http://purl.org/dc/elements/1.1/}creator: Jane Doe
{http://purl.org/dc/elements/1.1/}subject: Data Management; Best Practices
{http://purl.org/dc/elements/1.1/}description: This document discusses current trends and best practices in data management.
{http://purl.org/dc/elements/1.1/}date: 2023-01-01
{http://purl.org/dc/elements/1.1/}type: Research Report
{http://purl.org/dc/elements/1.1/}format: PDF
{http://purl.org/dc/elements/1.1/}identifier: https://example.com/resources/12345
{http://purl.org/dc/elements/1.1/}source: https://example.com
{http://purl.org/dc/elements/1.1/}language: en


In [4]:
import json

# Function to recursively print elements of the JSON data
def print_elements(element, indent=0):
    prefix = ' ' * indent
    if isinstance(element, dict):
        for key, value in element.items():
            print(f"{prefix}{key}:")
            print_elements(value, indent + 4)
    elif isinstance(element, list):
        for i, item in enumerate(element):
            print(f"{prefix}Index {i}:")
            print_elements(item, indent + 4)
    else:
        print(f"{prefix}{element}")

# Load the JSON data from a file
try:
    with open('books_metadata.json', 'r') as file:
        data = json.load(file)

    # Print the loaded JSON data
    print_elements(data)

except FileNotFoundError:
    print("File not found. Please make sure the file path is correct.")
except json.JSONDecodeError:
    print("Error decoding JSON. Please ensure the file contains valid JSON.")


Index 0:
    @context:
        http://schema.org
    @type:
        Book
    name:
        1984
    author:
        @type:
            Person
        name:
            George Orwell
    isbn:
        9780451524935
Index 1:
    @context:
        http://schema.org
    @type:
        Book
    name:
        To Kill a Mockingbird
    author:
        @type:
            Person
        name:
            Harper Lee
    isbn:
        9780060935467
Index 2:
    @context:
        http://schema.org
    @type:
        Book
    name:
        Pride and Prejudice
    author:
        @type:
            Person
        name:
            Jane Austen
    isbn:
        9781503290563


In [7]:
import json
from anytree import Node, RenderTree, PreOrderIter

# Sample JSON data (as a string for demonstration)
json_data = """
{
  "store": {
    "name": "Readers' Paradise",
    "location": "123 Book St, Fictionville, TX",
    "online": true
  },
  "books": [
    {
      "title": "The Great Escape",
      "author": {
        "firstName": "Jane",
        "lastName": "Doe",
        "age": 45,
        "nationality": "Canadian"
      },
      "isbn": "978-1-23456-789-0",
      "price": 15.99,
      "available": true,
      "categories": ["Adventure", "History"],
      "publicationDate": "2020-07-01",
      "reviews": [
        {
          "username": "booklover91",
          "rating": 4,
          "comment": "Exciting read with a few slow parts, but overall great!",
          "date": "2021-01-15"
        }
      ]
    }
  ],
  "metadata": {
    "lastUpdated": "2025-01-08",
    "numBooks": 2,
    "bestSeller": "The Great Escape"
  }
}
"""

# Function to recursively add nodes to the tree
def add_nodes(parent, obj):
    if isinstance(obj, dict):
        for k, v in obj.items():
            child = Node(k, parent=parent)
            add_nodes(child, v)
    elif isinstance(obj, list):
        for i, item in enumerate(obj):
            child = Node(f'Index {i}', parent=parent)
            add_nodes(child, item)
    else:
        Node(f"{obj}", parent=parent)

# Load JSON data
data = json.loads(json_data)

# Create the root of the tree
root = Node('Root')

# Build the tree from JSON data
add_nodes(root, data)

# Render the tree
for pre, fill, node in RenderTree(root):
    print("%s%s" % (pre, node.name))


Root
├── store
│   ├── name
│   │   └── Readers' Paradise
│   ├── location
│   │   └── 123 Book St, Fictionville, TX
│   └── online
│       └── True
├── books
│   └── Index 0
│       ├── title
│       │   └── The Great Escape
│       ├── author
│       │   ├── firstName
│       │   │   └── Jane
│       │   ├── lastName
│       │   │   └── Doe
│       │   ├── age
│       │   │   └── 45
│       │   └── nationality
│       │       └── Canadian
│       ├── isbn
│       │   └── 978-1-23456-789-0
│       ├── price
│       │   └── 15.99
│       ├── available
│       │   └── True
│       ├── categories
│       │   ├── Index 0
│       │   │   └── Adventure
│       │   └── Index 1
│       │       └── History
│       ├── publicationDate
│       │   └── 2020-07-01
│       └── reviews
│           └── Index 0
│               ├── username
│               │   └── booklover91
│               ├── rating
│               │   └── 4
│               ├── comment
│               │   └── Exciting read with a fe

In [6]:
pip install anytree

Collecting anytree
  Downloading anytree-2.12.1-py3-none-any.whl.metadata (8.1 kB)
Downloading anytree-2.12.1-py3-none-any.whl (44 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/44.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.9/44.9 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: anytree
Successfully installed anytree-2.12.1


In [2]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')
# Change working path
os.chdir('/content/drive/MyDrive/SRH/Modules/Data_Management_1/Practical')

Mounted at /content/drive
