In [75]:
import json
import pandas as pd

# Load the JSON file
with open("chatgpt_plugins.json", "r") as file:
    plugins = json.load(file)

# Convert to DataFrame
df = pd.DataFrame(plugins)

df['items'][0]

{'id': 'plugin-f1440a26-db54-4cae-a394-425c4042ecaa',
 'domain': 'baseball-stats.azurewebsites.net',
 'namespace': 'Major_League_Baseball_Statistics_Plugin',
 'status': 'approved',
 'manifest': {'schema_version': 'v1',
  'name_for_model': 'Major_League_Baseball_Statistics_Plugin',
  'name_for_human': 'MLB Stats',
  'description_for_model': "Plugin for retrieving up to date baseball statistics for teams and players across the MLB. This plugin should be used for retrieving any baseball-related information that is current beyond ChatGPT's cutoff date of September 2021.",
  'description_for_human': 'Access up-to-date baseball news and statistics for players and teams across the MLB.',
  'auth': {'type': 'none'},
  'api': {'type': 'openapi',
   'url': 'https://baseball-stats.azurewebsites.net/openapi.yaml'},
  'logo_url': 'https://baseball-stats.azurewebsites.net/logo.png',
  'contact_email': 'joe.fogelman9@gmail.com',
  'legal_info_url': 'https://github.com/fogel-j/baseball-stats-ai'},
 'o

In [76]:
# Create a new DataFrame with 'id' and 'description_for_model' for each plugin
plugins_df = pd.DataFrame(
    [(item['manifest']['name_for_human'], item['manifest']['description_for_model'], item['manifest']['description_for_human'], item['manifest']['logo_url']) for item in df['items']],
    columns=['plugin', 'description_for_model', 'description_for_human', 'logo_url']
)

plugins_df.head()

Unnamed: 0,plugin,description_for_model,description_for_human,logo_url
0,MLB Stats,Plugin for retrieving up to date baseball stat...,Access up-to-date baseball news and statistics...,https://baseball-stats.azurewebsites.net/logo.png
1,Love Discounts UK,Fetches store information and available coupon...,Ask for the best Love Discounts UK codes for 1...,https://cdn.lovesavingsgroup.com/assets/favico...
2,API Bot,"API Bot provides information about aws , googl...",This is a conversational bot that lets you ask...,https://firebasestorage.googleapis.com/v0/b/se...
3,AIQuest,"As AI, you generate a unique story based on pl...",An interactive text adventure. Your choices sh...,https://aiquest.waxime.dev/logo.png
4,Denison Yachting,Plugin for searching through our selection of ...,"Search yachts for sale, charters, new construc...",https://www.denisonyachtsales.com/.well-known/...


In [77]:
# Define categories and associated keywords
categories = {
    "Shopping": ["products", "shopping", "coupons", "amazon", "ebay", "aliexpress", "deals", "shop"],
    "Job and Career": ["job", "talent", "hiring", "freelance", "interview", "career", "recruit", "recruitment", "salary"],
    "Financial": ["market data", "stocks", "market analytics", "financial", "crypto", "finance", "bank", "interest rates", "currency", "bitcoin", "btc", "eth"],
    "AI assisted search": ["ai-assisted", "search", "google", "bing", "wolfram" ],
    "Content Search": ["pdf", "videos", "links", "url link", "video search", "pdfs"],
    "Japan": ["japan", "japanese"],
    "Learning": ["education", "learn", "lessons", "learning material", "curriculum", "class", "classes", "teacher", "teachers", "tutor", "tutors"],
    "Marketing": ["social network", "marketing", "posting", "seo", "search engine optimization", "social media", "social media marketing", "social media management"],
    "Code": ["github", "coding", "code generation", "code", "programming", "programmer", "programmers", "developer", "developers", "development", "develop", "develops", "developing", "software", "stack overflow", "stackoverflow", "domain", "domains"],
    "News": ["news", "newspaper", "newspapers", "article", "articles", "blog", "blogs", "blogging"],
    "Analytics": ["analytics", "insights", "data insights", "data analytics", "data analysis", "data scientist"],
    "Sports": ["sports", "nfl", "nhl", "nba", "nfl", "mlb", "baseball", "football", "basketball", "soccer", "hockey", "team", "teams", "player", "players", "score", "scores", "standings", "standings", "stats", "statistics"],
    "Entertainment": ["tv shows", "tv", "movies", "games", "minecraft", "gaming", "game", "video games", "video game", "anime", "manga", "comic", "comics", "cartoon", "cartoons", "entertainment", "fun", "funny", "joke", "jokes", "meme", "memes", "music", "songs", "song", "lyrics", "lyric", "lyric search", "lyrics search", "poem", "poems", "poetry", "poet", "poets", "poem search", "poems search", "poetry search", "poet search", "poets search", "story", "stories", "novel", "novels", "book", "books", "audiobook", "audiobooks", "pokemon"],
    "Machine Learning": ["ml", "machine learning", "dataset exploration", "datasets", "dataset", "data visualization"],
    "Summarization": ["summarize", "summarization", "paraphraser", "paraphrase", "summarizer"],
    "Legal": ["law", "legal", "laws", "case laws", "caselaw", "lawyer", "lawyers", "attorney", "attorneys", "legal advice", "legal advice"],
    "Charts and Diagrams": ["graph", "graphing", "charts", "charting", "diagrams", "diagram", "plot", "plots", "plotting", "form", "forms"],
    "Productivity": ["automation", "productivity", "translation", "utility", "utilities", "task", "tasks", "todos", "todo", "calendar", "calendars", "reminder", "reminders", "timer", "timers", "alarm", "alarms", "clock", "clocks", "time", "times", "date", "dates", "schedule", "schedules", "planner", "planners", "note", "notes", "notepad", "notepads", "calculator", "calculators"],
    "Earth": ["earth", "weather", "map", "maps", "travel"],
    "Uncategorized": []
}

# Lowercase all keywords for accurate matching
for category, keywords in categories.items():
    categories[category] = [keyword.lower() for keyword in keywords]

categories.keys()


dict_keys(['Shopping', 'Job and Career', 'Financial', 'AI assisted search', 'Content Search', 'Japan', 'Learning', 'Marketing', 'Code', 'News', 'Analytics', 'Sports', 'Entertainment', 'Machine Learning', 'Summarization', 'Legal', 'Charts and Diagrams', 'Productivity', 'Earth', 'Uncategorized'])

In [78]:
from collections import defaultdict

def categorize_plugin(plugin, categories):
    # Extract necessary fields
    name = plugin['plugin']
    description_model = plugin['description_for_model'].lower()
    description_human = plugin['description_for_human'].lower()

    # Initialize a dictionary to store matched categories
    matched_categories = defaultdict(int)

    for category, keywords in categories.items():
        # Check each category. Categories is a dict of "plugin": ["keyword1", "keyword2"...]
        # Check if any of the keywords match the plugin's description
        if any(keyword in description_model or keyword in description_human for keyword in keywords):
            matched_categories[category] += 1
    
    # if no categories were matched, assign the plugin to the 'Uncategorized' category
    if not matched_categories:
        matched_categories['Uncategorized'] += 1
    
    # Sort the categories by count and return the top 2
    sorted_categories = sorted(matched_categories.items(), key=lambda x: x[1], reverse=True)[:2]
    return [category for category, _ in sorted_categories]
        
        
# Initialize a dictionary to store plugins by category
categorized_plugins = defaultdict(list)

for _, plugin in plugins_df.iterrows():
    categorized = categorize_plugin(plugin, categories)
    for category in categorized:
        categorized_plugins[category].append(plugin)

# Sort the categories alphabetically
categorized_plugins = dict(sorted(categorized_plugins.items(), key=lambda item: item[0]))

categorized_plugins.keys()


dict_keys(['AI assisted search', 'Analytics', 'Charts and Diagrams', 'Code', 'Content Search', 'Earth', 'Entertainment', 'Financial', 'Japan', 'Job and Career', 'Learning', 'Legal', 'Machine Learning', 'Marketing', 'News', 'Productivity', 'Shopping', 'Sports', 'Summarization', 'Uncategorized'])

In [79]:
for category, plugins in categorized_plugins.items():
    print(f"{category}: {len(plugins)} plugins")

AI assisted search: 229 plugins
Analytics: 22 plugins
Charts and Diagrams: 97 plugins
Code: 81 plugins
Content Search: 61 plugins
Earth: 19 plugins
Entertainment: 71 plugins
Financial: 122 plugins
Japan: 7 plugins
Job and Career: 42 plugins
Learning: 37 plugins
Legal: 3 plugins
Machine Learning: 8 plugins
Marketing: 18 plugins
News: 35 plugins
Productivity: 57 plugins
Shopping: 73 plugins
Sports: 22 plugins
Summarization: 10 plugins
Uncategorized: 82 plugins


In [80]:
# Function to create markdown text for a plugin
def create_markdown_for_plugin(plugin):
    name = plugin['plugin'].strip()
    description = plugin['description_for_human'].strip().replace("\n", " ")
    # Logo URL if it exists, otherwise empty string
    logo_url = plugin['logo_url'].strip() 
    logo = f"<img src=\"{logo_url}\" alt=\"logo\" width=\"64\" />" if logo_url else ""
    return f"| {logo} | **{name}** | {description} |\n"

# Initialize the markdown text
markdown_text = "# Categorized Plugins\n\n"

# Create markdown text for each category and its plugins
for category, plugins in categorized_plugins.items():
    markdown_text += f"\n## {category}\n\n| Logo | Plugin Name | Description |\n| --- | --- | --- |\n"
    for plugin in plugins:
        markdown_text += create_markdown_for_plugin(plugin)

# Write the markdown text to a file
with open("categorized_plugins.md", "w") as file:
    file.write(markdown_text)


In [81]:
# Create a Markdown TOC (Table of Contents) for the file
%pip install markdown-toc
!markdown-toc -toc "## Plugin Categories" -t github categorized_plugins.md

Note: you may need to restart the kernel to use updated packages.
Creating a Table of Contents for 'categorized_plugins.md'


In [82]:
import re

# Append plugins_by_topic.md into README.md, inside the tags: <!-- BEGIN PLUGINS --> and <!-- END PLUGINS -->
with open("../README.md", "r") as file:
    readme_text = file.read()

with open("categorized_plugins.md", "r") as file:
    markdown_text = file.read()

with open("../README.md", "w") as file:
    # Replace the text between the tags <!-- BEGIN PLUGINS --> and <!-- END PLUGINS --> with the markdown text
    readme_text = re.sub(r"<!-- BEGIN PLUGINS -->(.|\n)*<!-- END PLUGINS -->", f"<!-- BEGIN PLUGINS -->\n\n{markdown_text}\n\n<!-- END PLUGINS -->", readme_text)
    file.write(readme_text)