# Kafka Admin Operations

This notebook demonstrates administrative operations for managing Kafka clusters and topics.

## Topics Covered:
- Creating topics with custom configurations
- Listing topics and partitions
- Deleting topics
- Describing topic configurations
- Monitoring cluster health

## 1. Setup and Configuration

In [None]:
import os
from kafka.admin import KafkaAdminClient, NewTopic, ConfigResource, ConfigResourceType
from kafka import KafkaConsumer
from kafka.errors import TopicAlreadyExistsError, UnknownTopicOrPartitionError

# Kafka cluster connection
KAFKA_SERVERS = os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'kafka1:29092,kafka2:29093,kafka3:29094')
print(f"Connecting to Kafka at: {KAFKA_SERVERS}")

## 2. Create Admin Client

In [None]:
# Create admin client
admin_client = KafkaAdminClient(
    bootstrap_servers=KAFKA_SERVERS.split(','),
    client_id='kafka-admin'
)

print("✓ Admin client created successfully!")

## 3. List All Topics

In [None]:
# List all topics
topics = admin_client.list_topics()

print(f"Total topics: {len(topics)}\n")
for topic in sorted(topics):
    print(f"  - {topic}")

## 4. Create New Topic

In [None]:
# Define new topic
new_topic = NewTopic(
    name='demo-topic',
    num_partitions=3,
    replication_factor=3
)

try:
    # Create topic
    admin_client.create_topics(new_topics=[new_topic], validate_only=False)
    print("✓ Topic 'demo-topic' created successfully!")
    print("  Partitions: 3")
    print("  Replication Factor: 3")
except TopicAlreadyExistsError:
    print("⚠ Topic 'demo-topic' already exists")

## 5. Create Topic with Custom Configuration

In [None]:
# Create topic with custom retention and cleanup policy
custom_topic = NewTopic(
    name='custom-config-topic',
    num_partitions=2,
    replication_factor=2,
    topic_configs={
        'retention.ms': '86400000',  # 1 day retention
        'cleanup.policy': 'compact',  # Log compaction
        'compression.type': 'gzip',
        'max.message.bytes': '1048576'  # 1 MB max message size
    }
)

try:
    admin_client.create_topics(new_topics=[custom_topic], validate_only=False)
    print("✓ Topic 'custom-config-topic' created with custom configuration!")
except TopicAlreadyExistsError:
    print("⚠ Topic 'custom-config-topic' already exists")

## 6. Describe Topic Configuration

In [None]:
# Get topic configuration
topic_name = 'demo-topic'
config_resource = ConfigResource(ConfigResourceType.TOPIC, topic_name)
configs = admin_client.describe_configs(config_resources=[config_resource])

print(f"Configuration for topic '{topic_name}':\n")
for config in configs:
    for key, metadata in config.resources[0][4].items():
        if not metadata[1]:  # Only show non-default configs
            continue
        print(f"  {key}: {metadata[0]}")

## 7. Get Topic Details (Partitions and Replicas)

In [None]:
# Use consumer to get partition info
consumer = KafkaConsumer(
    bootstrap_servers=KAFKA_SERVERS.split(','),
    group_id='admin-info-group'
)

topic_name = 'demo-topic'
partitions = consumer.partitions_for_topic(topic_name)

if partitions:
    print(f"Topic '{topic_name}' details:\n")
    print(f"  Total partitions: {len(partitions)}")
    print(f"  Partition IDs: {sorted(partitions)}")
    
    # Get offset information for each partition
    from kafka import TopicPartition
    partition_list = [TopicPartition(topic_name, p) for p in partitions]
    
    beginning_offsets = consumer.beginning_offsets(partition_list)
    end_offsets = consumer.end_offsets(partition_list)
    
    print("\n  Partition Details:")
    for partition in sorted(partition_list, key=lambda x: x.partition):
        begin = beginning_offsets[partition]
        end = end_offsets[partition]
        messages = end - begin
        print(f"    Partition {partition.partition}: {messages} messages (offset {begin} to {end})")
else:
    print(f"Topic '{topic_name}' not found")

consumer.close()

## 8. Create Multiple Topics

In [None]:
# Create multiple topics at once
topics_to_create = [
    NewTopic(name='events-topic', num_partitions=4, replication_factor=2),
    NewTopic(name='logs-topic', num_partitions=2, replication_factor=2),
    NewTopic(name='metrics-topic', num_partitions=6, replication_factor=3)
]

try:
    admin_client.create_topics(new_topics=topics_to_create, validate_only=False)
    print("✓ Created multiple topics:")
    for topic in topics_to_create:
        print(f"  - {topic.name} ({topic.num_partitions} partitions, RF={topic.replication_factor})")
except TopicAlreadyExistsError as e:
    print(f"⚠ Some topics already exist: {e}")

## 9. Delete Topic

In [None]:
# Delete a topic
topic_to_delete = 'custom-config-topic'

try:
    admin_client.delete_topics(topics=[topic_to_delete])
    print(f"✓ Topic '{topic_to_delete}' deleted successfully!")
except UnknownTopicOrPartitionError:
    print(f"⚠ Topic '{topic_to_delete}' does not exist")

## 10. Get Cluster Metadata

In [None]:
# Get cluster information
from kafka import KafkaConsumer

temp_consumer = KafkaConsumer(
    bootstrap_servers=KAFKA_SERVERS.split(','),
    group_id='metadata-group'
)

# Wait for metadata
temp_consumer.poll(timeout_ms=1000)

# Get cluster metadata
cluster = temp_consumer._client.cluster

print("Kafka Cluster Metadata:\n")
print(f"  Cluster ID: {cluster.cluster_id()}")
print(f"  Controller: Broker {cluster.controller()}")
print(f"\n  Brokers:")

for broker_id in cluster.brokers():
    broker = cluster.broker_metadata(broker_id)
    print(f"    Broker {broker_id}: {broker.host}:{broker.port}")

temp_consumer.close()

## 11. List Consumer Groups

In [None]:
# List all consumer groups
consumer_groups = admin_client.list_consumer_groups()

print(f"Total consumer groups: {len(consumer_groups)}\n")
for group in consumer_groups:
    print(f"  - {group[0]} (Protocol: {group[1]})")

## 12. Cleanup

In [None]:
# Close admin client
admin_client.close()
print("✓ Admin client closed")

## Key Takeaways

1. **Topic Creation**: Specify partitions and replication factor based on your needs
2. **Replication Factor**: Should not exceed number of brokers
3. **Partitions**: More partitions = more parallelism, but more overhead
4. **Topic Configs**: Customize retention, cleanup policy, compression
5. **Monitoring**: Regularly check cluster metadata and topic details

## Best Practices

- Use replication factor of 3 for production topics
- Choose partition count based on expected throughput
- Set appropriate retention policies to manage disk usage
- Monitor consumer lag and cluster health

## Next Steps

Try the Stream Processing notebook (04_stream_processing.ipynb)!