In [138]:
import os
import json
import pandas as pd

def load_chatgpt_plugin_descriptions():
    if os.path.isfile('chatgpt_plugin_descriptions.json'):
        if os.stat('chatgpt_plugin_descriptions.json').st_size == 0:
            return {}
        else:
            with open('chatgpt_plugin_descriptions.json', 'r') as f:
                return json.load(f)
    else:
        with open('chatgpt_plugin_descriptions.json', 'w') as f:
            json.dump({}, f)
        return {}

plugins = load_chatgpt_plugin_descriptions()

df = pd.DataFrame(plugins)
  
df['items'][0]

{'id': 'plugin-c0bde20b-ea30-4550-bae1-3ed0105f865c',
 'domain': 'telegraph-export.illia56.repl.co',
 'namespace': 'telegraph_export',
 'status': 'approved',
 'manifest': {'schema_version': 'v1',
  'name_for_model': 'telegraph_export',
  'name_for_human': 'Telegraph Export',
  'description_for_model': 'Plugin for exporting text to Telegraph.',
  'description_for_human': 'Export text to Telegraph.',
  'auth': {'type': 'none'},
  'api': {'type': 'openapi',
   'url': 'https://telegraph-export.illia56.repl.co/openapi.yaml'},
  'logo_url': 'https://telegraph-export.illia56.repl.co/logo.jpg',
  'contact_email': 'i.ludogovskyi@gmail.com',
  'legal_info_url': 'https://butter-tangerine-f7b.notion.site/Legal-Telegraph-Export-7c68f6b828004243aef558c8e81b33ae?pvs=4',
  'generated_description': 'The Telegraph Export plugin allows users to easily export text to the Telegraph platform. With this plugin, users can conveniently transfer their text content to Telegraph for publishing or sharing purposes

In [139]:
plugins_df = pd.DataFrame(
  [(item['manifest']['name_for_human'],
    item['manifest']['category'],
    item['manifest']['generated_description'],
    item['manifest']['description_for_human'],
    item['manifest']['logo_url']) for item in df['items']],
  columns=['name', 'category', 'description', 'description_for_human', 'logo_url']
)

plugins_df.head()

Unnamed: 0,name,category,description,description_for_human,logo_url
0,Telegraph Export,Documents,The Telegraph Export plugin allows users to ea...,Export text to Telegraph.,https://telegraph-export.illia56.repl.co/logo.jpg
1,Love Coupons,Coding,Love Coupons is a plugin that allows users to ...,Find money saving coupon & discount codes for ...,https://cdn.lovesavingsgroup.com/assets/favico...
2,PayPay gourmet,Food and Drink,The PayPay Gourmet plugin allows users to sear...,You can find restaurants in Japan from PayPay ...,https://s.yimg.jp/images/paypaygourmet/common/...
3,Check Website Down,Productivity,The Check Website Down plugin allows users to ...,Insert your website URL to check the availabil...,https://api.pulsetic.com/chatgpt_logo.png
4,AnaBot,Health and Fitness,AnaBot is a conversational guide that provides...,AnaBot is a conversational guide for well-bein...,https://firebasestorage.googleapis.com/v0/b/se...


In [140]:
# Count the number of plugins in plugins_df:
print(f"Plugin count: {plugins_df['name'].count()}")

# Print the names of all of the duplicates in plugins_df:
print(f"Duplicate names: {plugins_df[plugins_df.duplicated(subset=['name'])]['name'].values}")

# Find and remove duplicates in plugins_df by name
# plugins_df = plugins_df.drop_duplicates(subset=['name'])

# print(f"Duplicates removed. New plugin count: {plugins_df['name'].count()}")


Plugin count: 695
Duplicate names: ['Repo Radar' 'Code Runner' 'Currency Converter' 'Clinical Trial Radar'
 'What To Watch' 'Currency Converter' 'RoboAd' 'Scraper'
 'Property Finder UK']


In [141]:
import yaml 
with open('../categories.yaml', 'r') as f:
    categories = yaml.safe_load(f)

for category, keywords in categories.items():
    categories[category] = [keyword.lower() for keyword in keywords]

categories.keys()

dict_keys(['Charts and Diagrams', 'Coding', 'Documents', 'Earth and Space', 'Education and Learning', 'Entertainment', 'Events', 'Finance', 'Food and Drink', 'Health and Fitness', 'Images', 'Job and Career', 'Legal and Politics', 'Marketing', 'News and Media', 'Productivity', 'Prompts', 'Real Estate', 'Search', 'Shopping', 'Social Networking', 'Sports', 'Text', 'Travel', 'Uncategorized', 'Video'])

In [142]:
from collections import defaultdict
import re

def categorize_plugin(plugin, categories):
    name = plugin['name'].lower().replace("'", '')
    description = plugin['description'].lower().replace("'", '')
    description_for_human = plugin['description_for_human'].lower().replace("'", '')

    name = re.sub('[^a-zA-Z0-9 \n\\.]', '', name)
    description = re.sub('[^a-zA-Z0-9 \n\\.]', '', description)
    description_for_human = re.sub('[^a-zA-Z0-9 \n\\.]', '', description_for_human)

    # Initialize a dictionary to store matched categories
    matched_categories = defaultdict(int)

    for category, keywords in categories.items():
        # Check if any of the keywords match the plugin's description
        for keyword in keywords:
            # If the keyword is in the plugin name, it's ranked very highly
            # Do a partial word match for if keyword appears in the plugin name
            if keyword in name or name.find(keyword) != -1:
                matched_categories[category] += 5
            
            if keyword == name:
                matched_categories[category] += 10
            
            if keyword in description_for_human:
                matched_categories[category] += 3
            
            if keyword in description:
                matched_categories[category] += 1
        
        # If the plugin['category'] is not None, and it matches the category, rank it highly
        if plugin['category'] is not None and plugin['category'] == category:
            matched_categories[category] += 5 
    
    # if no categories were matched, assign the plugin to the 'Uncategorized' category
    if not matched_categories:
        matched_categories['Uncategorized'] += 1
    
    # Sort the categories by count and return the top 1
    sorted_categories = sorted(matched_categories.items(), key=lambda x: x[1], reverse=True)[:1]

    additional_categories = sorted(matched_categories.items(), key=lambda x: x[1], reverse=True)[:3]
    print(f"{plugin['name']}: [{sorted_categories[0]}] : {additional_categories}")
    
    return [category for category, _ in sorted_categories]
        
        
# Initialize a dictionary to store plugins by category
categorized_plugins = defaultdict(list)
mismached_categories = defaultdict(list)

for _, plugin in plugins_df.iterrows():
    categorized = categorize_plugin(plugin, categories)
    categorized_plugins[category].append(plugin)

    for category in categorized:
        if category != plugin['category']:
            print(f"Plugin category mismatch for {plugin['name']}: {plugin['category']} != {category}")
            mismached_categories[category].append(plugin)

with open("mismatched_categories.txt", "w") as f:
    f.write("Mismatches\n")
    for category, plugins in mismached_categories.items():
        f.write(f"{category}:\n")
        for plugin in plugins:
            f.write(f"\t{plugin['name']}\n")
        f.write("\n")

# Sort the categories alphabetically
categorized_plugins = dict(sorted(categorized_plugins.items(), key=lambda item: item[0]))

categorized_plugins.keys()
    

Telegraph Export: [('Documents', 20)] : [('Documents', 20), ('Charts and Diagrams', 10), ('Text', 5)]
Love Coupons: [('Shopping', 12)] : [('Shopping', 12), ('Coding', 10), ('Finance', 4)]
Plugin category mismatch for Love Coupons: Coding != Shopping
PayPay gourmet: [('Food and Drink', 18)] : [('Food and Drink', 18), ('Marketing', 2), ('Charts and Diagrams', 1)]
Check Website Down: [('Productivity', 5)] : [('Productivity', 5), ('Charts and Diagrams', 4), ('Search', 4)]
AnaBot: [('Health and Fitness', 5)] : [('Health and Fitness', 5), ('Shopping', 3), ('Charts and Diagrams', 1)]
Book Tickets: [('Events', 33)] : [('Events', 33), ('Education and Learning', 6), ('Entertainment', 5)]
RoboAd: [('Marketing', 12)] : [('Marketing', 12), ('Finance', 6), ('Productivity', 4)]
Plugin category mismatch for RoboAd: Finance != Marketing
Ebay Finds: [('Shopping', 14)] : [('Shopping', 14), ('Coding', 5), ('Charts and Diagrams', 1)]
Plugin category mismatch for Ebay Finds: Coding != Shopping
Amazon Finds:

dict_keys(['Charts and Diagrams', 'Coding', 'Documents', 'Earth and Space', 'Education and Learning', 'Entertainment', 'Events', 'Finance', 'Food and Drink', 'Health and Fitness', 'Images', 'Job and Career', 'Legal and Politics', 'Marketing', 'News and Media', 'Productivity', 'Prompts', 'Real Estate', 'Search', 'Shopping', 'Social Networking', 'Sports', 'Text', 'Travel', 'Uncategorized', 'Video'])

In [143]:
for category, plugins in categorized_plugins.items():
    categorized_plugins[category] = sorted(plugins, key=lambda item: item[0])
    print(f"{category}: {len(plugins)} plugins")

print(len(plugins_df))

Charts and Diagrams: 21 plugins
Coding: 59 plugins
Documents: 33 plugins
Earth and Space: 25 plugins
Education and Learning: 40 plugins
Entertainment: 49 plugins
Events: 8 plugins
Finance: 59 plugins
Food and Drink: 17 plugins
Health and Fitness: 14 plugins
Images: 12 plugins
Job and Career: 27 plugins
Legal and Politics: 12 plugins
Marketing: 32 plugins
News and Media: 16 plugins
Productivity: 64 plugins
Prompts: 11 plugins
Real Estate: 6 plugins
Search: 52 plugins
Shopping: 57 plugins
Social Networking: 15 plugins
Sports: 8 plugins
Text: 6 plugins
Travel: 35 plugins
Uncategorized: 2 plugins
Video: 15 plugins
695


In [144]:
# Function to create markdown text for a plugin
def create_markdown_for_plugin(plugin):
    name = plugin['name'].strip()
    description = plugin['description'].strip().replace("\n", " ")
    # Logo URL if it exists, otherwise empty string
    logo_url = plugin['logo_url'].strip() 
    logo = f"<img src=\"{logo_url}\" alt=\"logo\" width=\"64\" />" if logo_url else ""
    return f"| {logo} | **{name}** | {description} |\n"

# Initialize the markdown text
markdown_text = ""
toc_text = "## Categories\n\n"
current_date = pd.to_datetime('today').strftime("%B %d, %Y")
toc_text += f"As of **{current_date}**, there are **{len(plugins_df)} plugins** in the ChatGPT Plugin Store.\n\n"

# Create markdown text for each category and its plugins
for category, plugins in categorized_plugins.items():
    markdown_text += f"\n## {category}\n\n| Logo | Plugin Name | Description |\n| --- | --- | --- |\n"
    toc_text += f"- [{category}](#{category.lower().replace(' ', '-')}) - {len(plugins)} plugins\n"
    for plugin in plugins:
        markdown_text += create_markdown_for_plugin(plugin)

# Write the markdown text to a file
with open("categorized_plugins.md", "w") as file:
    file.write(toc_text)
    file.write(markdown_text)

In [145]:

import re

# Append plugins_by_topic.md into README.md, inside the tags: <!-- BEGIN PLUGINS --> and <!-- END PLUGINS -->
with open("../README.md", "r") as file:
    readme_text = file.read()

with open("categorized_plugins.md", "r") as file:
    markdown_text = file.read()

with open("../README.md", "w") as file:
    # Replace the text between the tags <!-- BEGIN PLUGINS --> and <!-- END PLUGINS --> with the markdown text
    readme_text = re.sub(r"<!-- BEGIN PLUGINS -->(.|\n)*<!-- END PLUGINS -->", f"<!-- BEGIN PLUGINS -->\n\n{markdown_text}\n\n<!-- END PLUGINS -->", readme_text)
    file.write(readme_text)