### Complex Schema Validation with Avro
**Description**: Implement a solution in Python to validate records against a complex nested Avro schema.

Eg., Complex schema ( nested_schema.avsc ):

**Steps**:
1. Load schema
2. Example data to validate
3. Validate against schema
4. Read back to check

In [2]:
# Ques_4.ipynb – Complex Schema Validation with Avro (Fixed Version)

import json
import avro.schema
import io
from avro.io import DatumWriter, BinaryEncoder
import pandas as pd

# 1. Define Avro schema
schema_json = {
    "namespace": "example.avro",
    "type": "record",
    "name": "Customer",
    "fields": [
        {"name": "customer_id", "type": "int"},
        {"name": "name", "type": "string"},
        {"name": "email", "type": "string"},
        {"name": "is_active", "type": "boolean"},
        {"name": "balance", "type": "double"},
        {"name": "joined_date", "type": "string"}
    ]
}

schema = avro.schema.parse(json.dumps(schema_json))

# 2. Sample records
records = [
    {"customer_id": 1, "name": "Alice", "email": "alice@example.com", "is_active": True, "balance": 100.0, "joined_date": "2023-01-01"},
    {"customer_id": 2, "name": "Bob", "email": "bob@example.com", "is_active": False, "balance": 150.5, "joined_date": "2023-02-10"},
    # Invalid: missing email
    {"customer_id": 3, "name": "Charlie", "is_active": True, "balance": 50.0, "joined_date": "2023-03-01"},
    # Invalid: string instead of double
    {"customer_id": 4, "name": "Dana", "email": "dana@example.com", "is_active": True, "balance": "NaN", "joined_date": "2023-04-01"},
]

# 3. Validate each record
valid_records = []
invalid_records = []

for record in records:
    try:
        bytes_writer = io.BytesIO()
        encoder = BinaryEncoder(bytes_writer)
        writer = DatumWriter(schema)
        writer.write(record, encoder)
        valid_records.append(record)
    except Exception as e:
        invalid_records.append((record, str(e)))

# 4. Results
print("✅ Valid Records:")
for r in valid_records:
    print(r)

print("\n❌ Invalid Records:")
for r, e in invalid_records:
    print(f"Record: {r}\nError: {e}\n")

# 5. Convert to DataFrame
df = pd.DataFrame(valid_records)
print("\n📘 Final DataFrame (valid records only):")
print(df)


ModuleNotFoundError: No module named 'avro'