In [7]:
# Example usage
input_data = """Timestamp 0: Dog (0.26)
Timestamp 1: Domestic animals, pets (0.20)
Timestamp 2: Domestic animals, pets (0.26)
Timestamp 3: Animal (0.59)
Timestamp 4: Bark (0.26)
Timestamp 5: Domestic animals, pets (0.33)
Timestamp 6: Domestic animals, pets (0.33)
Timestamp 7: Domestic animals, pets (0.26)
Timestamp 8: Animal (0.33)
Timestamp 9: Speech (0.41)
Timestamp 10: Domestic animals, pets (0.59)
Timestamp 11: Domestic animals, pets (0.59)
Timestamp 12: Domestic animals, pets (0.41)
Timestamp 13: Silence (0.08)"""

In [10]:
from collections import Counter

def get_top_categories(input_string):
    """
    Processes the input string of timestamps, extracts categories,
    and returns the top 2 most frequent categories as a comma-separated string.

    Args:
        input_string (str): Multiline string containing timestamp data.

    Returns:
        str: A comma-separated string of the top 2 categories.
    """
    # Split the input into lines
    lines = input_string.strip().split("\n")
    
    # Extract categories
    categories = []
    for line in lines:
        try:
            # Extract the category part using string slicing
            category_start = line.find(":") + 2
            category_end = line.rfind("(") - 1
            category = line[category_start:category_end].strip()
            categories.append(category)
        except Exception as e:
            print(f"Error processing line: {line}. Error: {e}")
    
    # Count occurrences of each category
    category_counts = Counter(categories)
    
    # Get the top 2 most frequent categories
    top_categories = [category for category, count in category_counts.most_common(2)]
    
    # Return categories as a comma-separated string
    return ", ".join(top_categories)




In [11]:
top_categories = get_top_categories(input_data)
print("Top 2 Categories:", top_categories)


Top 2 Categories: Domestic animals, pets, Animal
