In [326]:
import json
import pandas as pd

# Load the JSON file
with open("chatgpt_plugins.json", "r") as file:
    plugins = json.load(file)

# Convert to DataFrame
df = pd.DataFrame(plugins)

df['items'][0]

{'id': 'plugin-f1440a26-db54-4cae-a394-425c4042ecaa',
 'domain': 'baseball-stats.azurewebsites.net',
 'namespace': 'Major_League_Baseball_Statistics_Plugin',
 'status': 'approved',
 'manifest': {'schema_version': 'v1',
  'name_for_model': 'Major_League_Baseball_Statistics_Plugin',
  'name_for_human': 'MLB Stats',
  'description_for_model': "Plugin for retrieving up to date baseball statistics for teams and players across the MLB. This plugin should be used for retrieving any baseball-related information that is current beyond ChatGPT's cutoff date of September 2021.",
  'description_for_human': 'Access up-to-date baseball news and statistics for players and teams across the MLB.',
  'auth': {'type': 'none'},
  'api': {'type': 'openapi',
   'url': 'https://baseball-stats.azurewebsites.net/openapi.yaml'},
  'logo_url': 'https://baseball-stats.azurewebsites.net/logo.png',
  'contact_email': 'joe.fogelman9@gmail.com',
  'legal_info_url': 'https://github.com/fogel-j/baseball-stats-ai'},
 'o

In [327]:
# Create a new DataFrame with 'id' and 'description_for_model' for each plugin
plugins_df = pd.DataFrame(
    [(item['manifest']['name_for_human'], item['manifest']['description_for_model'], item['manifest']['description_for_human'], item['manifest']['logo_url']) for item in df['items']],
    columns=['plugin', 'description_for_model', 'description_for_human', 'logo_url']
)

plugins_df.head()

Unnamed: 0,plugin,description_for_model,description_for_human,logo_url
0,MLB Stats,Plugin for retrieving up to date baseball stat...,Access up-to-date baseball news and statistics...,https://baseball-stats.azurewebsites.net/logo.png
1,Love Discounts UK,Fetches store information and available coupon...,Ask for the best Love Discounts UK codes for 1...,https://cdn.lovesavingsgroup.com/assets/favico...
2,API Bot,"API Bot provides information about aws , googl...",This is a conversational bot that lets you ask...,https://firebasestorage.googleapis.com/v0/b/se...
3,AIQuest,"As AI, you generate a unique story based on pl...",An interactive text adventure. Your choices sh...,https://aiquest.waxime.dev/logo.png
4,Denison Yachting,Plugin for searching through our selection of ...,"Search yachts for sale, charters, new construc...",https://www.denisonyachtsales.com/.well-known/...


In [328]:
categories = {
    'Charts and Diagrams': ["graph", "graphing", "charts", "charting", "diagrams", "diagram", "plot", "plots", "plotting", "form", "forms"],
    'Coding' : ['code', 'program', 'software', 'development', 'algorithm', 'debug', 'compile', 'script', 'library', 'API', 'framework', 'coding', 'github', 'repo', 'repository', 'stack overflow', 'stackoverflow', 'domain', 'domains', 'dns', 'programming'],
    'Documents' : ['docs', 'documents', 'pdf', 'pdfs'],
    'Earth': ["earth", "weather", "map", "maps", "nasa"],
    'Education / Learning' : ['learn', 'education', 'study', 'course', 'school', 'book', 'teach', 'lecture', 'tutor', 'academic', 'university', 'college', 'knowledge'],
    'Entertainment' : ['movie', 'game', 'games', 'music', 'entertain', 'fun', 'play', 'show', 'theatre', 'concert', 'party', 'festival', 'trivia', 'entertainment', 'tv shows', 'movies', 'adventure', 'pokemon' ],
    'Events' : ['event', 'events', 'tickets', 'ticket', 'concert', 'concerts', 'show', 'shows', 'festival', 'festivals', 'party', 'parties'],
    'Finance' : ['finance', 'money', 'bank', 'stock', 'investment', 'crypto', 'budget', 'economy', 'transaction', 'insurance', 'loan', 'bitcoin', 'blockchain', 'currency', 'trading', 'stock market', 'financial', 'finances', 'earnings', 'tokens', 'etherium', 'ETH', 'BTC'],
    'Food & Drink' : ['food', 'drink', 'eat', 'cook', 'recipe', 'dining', 'restaurant', 'cuisine', 'meal', 'bake', 'taste', 'gourmet'],
    'Health & Fitness' : ['health', 'fitness', 'gym', 'diet', 'exercise', 'wellness', 'nutrition', 'workout', 'training', 'yoga', 'meditation'],
    'Job and Career' : ['job', 'career', 'work', 'employ', 'hire', 'profession', 'recruit', 'salary', 'interview', 'resume', 'position', 'occupation'],
    'Legal': ["law", "legal", "laws", "case laws", "caselaw", "lawyer", "lawyers", "attorney", "attorneys", 'bills', 'congress', 'politics', 'politicians', 'bylaws', 'legislation', 'legislature', 'legislative', 'legislatures'],
    'Marketing' : ['market', 'advertise', 'brand', 'campaign', 'SEO', 'analytics', 'target', 'strategy', 'promotion', 'sales', 'customer', 'conversion', 'pitch', 'business', 'startup'],
    'News & Media' : ['news', 'media', 'journal', 'report', 'press', 'publish', 'broadcast', 'headline', 'article', 'coverage', 'newspaper', 'magazine'],
    'Productivity' : ['productivity', 'organize', 'manage', 'efficiency', 'work', 'task', 'goal', 'project', 'schedule', 'planning', 'optimize', 'reminders', 'reminder', 'todo', 'todos', 'calendar', 'timer', 'alarm', 'notes', 'planner', 'calculator', 'utility', 'utilities', 'track', 'assistant', 'diary', 'journal'],
    'Prompts' : ['prompt', 'prompts', 'prompting'],
    'Search': ['link', 'links', 'url', 'web page', 'bing', 'google', 'wolfram', 'search', 'fetch'],
    'Shopping' : ['shop', 'buy', 'purchase', 'sell', 'store', 'market', 'retail', 'sale', 'shopping', 'amazon', 'ebay', 'products', 'product', 'deal', 'deals', 'for sale', 'sale', 'discount'],
    'Social Networking' : ['social', 'network', 'chat', 'message', 'friend', 'community', 'share', 'connect', 'follow', 'post', 'like', 'comment', 'twitter', 'facebook', 'instagram', 'snapchat', 'tiktok', 'linkedin', 'pinterest', 'reddit', 'social media', 'social network', 'tweet', 'tweets'],
    'Sports' : ['sport', 'game', 'player', 'team', 'score', 'league', 'match', 'coach', 'stadium', 'tournament', 'athlete', 'fitness'],
    'Summarization': ["summarize", "summarization", "paraphraser", "paraphrase", "summarizer", 'summary'],
    'Text': ['text', 'translate', 'language', 'poem', 'poems', 'quote', 'quotes'],
    'Travel' : ['travel', 'flight', 'flights', 'hotel', 'trip', 'journey', 'tour', 'vacation', 'holiday', 'explore', 'destination', 'adventure', 'travel', 'train', 'subway', 'ferry', 'ferries', 'theme park'],
    'Uncategorized': [],
    'Video' : ['video', 'youtube', 'videos', 'vimeo', 'tiktok', 'twitch' ]
}

# Lowercase all keywords for accurate matching
for category, keywords in categories.items():
    categories[category] = [keyword.lower() for keyword in keywords]

categories.keys()

dict_keys(['Charts and Diagrams', 'Coding', 'Documents', 'Earth', 'Education / Learning', 'Entertainment', 'Events', 'Finance', 'Food & Drink', 'Health & Fitness', 'Job and Career', 'Legal', 'Marketing', 'News & Media', 'Productivity', 'Prompts', 'Search', 'Shopping', 'Social Networking', 'Sports', 'Summarization', 'Text', 'Travel', 'Uncategorized', 'Video'])

In [329]:
from collections import defaultdict

def categorize_plugin(plugin, categories):
    # Extract necessary fields
    name = plugin['plugin'].lower()
    description_model = plugin['description_for_model'].lower()
    description_human = plugin['description_for_human'].lower()

    # Initialize a dictionary to store matched categories
    matched_categories = defaultdict(int)

    for category, keywords in categories.items():
        # Check if any of the keywords match the plugin's description
        for keyword in keywords:
            # If the keyword is in the plugin name, it's ranked very highly
            # Do a partial word match for if keyword appears in the plugin name
            if keyword in name or name.find(keyword) != -1:
                matched_categories[category] += 3
            
            # The short (human) description get's a mid-level rank
            if keyword in description_human:
                matched_categories[category] += 2
            
            # Keywords appearing in the longer model description get a lower rank
            if keyword in description_model:
                matched_categories[category] += 1
    
    # if no categories were matched, assign the plugin to the 'Uncategorized' category
    if not matched_categories:
        matched_categories['Uncategorized'] += 1
    
    # Sort the categories by count and return the top 1
    sorted_categories = sorted(matched_categories.items(), key=lambda x: x[1], reverse=True)[:1]
    return [category for category, _ in sorted_categories]
        
        
# Initialize a dictionary to store plugins by category
categorized_plugins = defaultdict(list)

for _, plugin in plugins_df.iterrows():
    categorized = categorize_plugin(plugin, categories)
    for category in categorized:
        categorized_plugins[category].append(plugin)

# Sort the categories alphabetically
categorized_plugins = dict(sorted(categorized_plugins.items(), key=lambda item: item[0]))

categorized_plugins.keys()


dict_keys(['Charts and Diagrams', 'Coding', 'Documents', 'Earth', 'Education / Learning', 'Entertainment', 'Events', 'Finance', 'Food & Drink', 'Health & Fitness', 'Job and Career', 'Legal', 'Marketing', 'News & Media', 'Productivity', 'Prompts', 'Search', 'Shopping', 'Social Networking', 'Sports', 'Summarization', 'Text', 'Travel', 'Uncategorized', 'Video'])

In [330]:
for category, plugins in categorized_plugins.items():
    print(f"{category}: {len(plugins)} plugins")

Charts and Diagrams: 38 plugins
Coding: 70 plugins
Documents: 13 plugins
Earth: 20 plugins
Education / Learning: 30 plugins
Entertainment: 44 plugins
Events: 11 plugins
Finance: 52 plugins
Food & Drink: 29 plugins
Health & Fitness: 4 plugins
Job and Career: 25 plugins
Legal: 9 plugins
Marketing: 23 plugins
News & Media: 14 plugins
Productivity: 32 plugins
Prompts: 10 plugins
Search: 70 plugins
Shopping: 62 plugins
Social Networking: 32 plugins
Sports: 9 plugins
Summarization: 2 plugins
Text: 13 plugins
Travel: 28 plugins
Uncategorized: 15 plugins
Video: 16 plugins


In [331]:
# Function to create markdown text for a plugin
def create_markdown_for_plugin(plugin):
    name = plugin['plugin'].strip()
    description = plugin['description_for_human'].strip().replace("\n", " ")
    # Logo URL if it exists, otherwise empty string
    logo_url = plugin['logo_url'].strip() 
    logo = f"<img src=\"{logo_url}\" alt=\"logo\" width=\"64\" />" if logo_url else ""
    return f"| {logo} | **{name}** | {description} |\n"

# Initialize the markdown text
markdown_text = "# Categorized Plugins\n\n"

# Create markdown text for each category and its plugins
for category, plugins in categorized_plugins.items():
    markdown_text += f"\n## {category}\n\n| Logo | Plugin Name | Description |\n| --- | --- | --- |\n"
    for plugin in plugins:
        markdown_text += create_markdown_for_plugin(plugin)

# Write the markdown text to a file
with open("categorized_plugins.md", "w") as file:
    file.write(markdown_text)


In [332]:
# Create a Markdown TOC (Table of Contents) for the file
%pip install markdown-toc
!markdown-toc -toc "## Plugin Categories" -t github categorized_plugins.md

Note: you may need to restart the kernel to use updated packages.
Creating a Table of Contents for 'categorized_plugins.md'


In [333]:
import re

# Append plugins_by_topic.md into README.md, inside the tags: <!-- BEGIN PLUGINS --> and <!-- END PLUGINS -->
with open("../README.md", "r") as file:
    readme_text = file.read()

with open("categorized_plugins.md", "r") as file:
    markdown_text = file.read()

with open("../README.md", "w") as file:
    # Replace the text between the tags <!-- BEGIN PLUGINS --> and <!-- END PLUGINS --> with the markdown text
    readme_text = re.sub(r"<!-- BEGIN PLUGINS -->(.|\n)*<!-- END PLUGINS -->", f"<!-- BEGIN PLUGINS -->\n\n{markdown_text}\n\n<!-- END PLUGINS -->", readme_text)
    file.write(readme_text)