In [13]:
import time
import pandas as pd
import numpy as np
import streamlit as st
from openai._client import OpenAI
import json

client = OpenAI(
    api_key=st.secrets["openai"]["api_key"],
)

In [11]:
filter_tags_test = pd.read_csv("filter_tags.csv")

prompt= "Help me determine if the list of filter tags for kongsberg maritime is correct, or if I should change some of the filter tags, to better make a filter to search for products. Here is my excel file: {}. Please state what changes you recommend and how to fix them".format(filter_tags_test)

response = client.chat.completions.create(
                model="gpt-4-1106-preview",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant"}, 
                    {"role": "user", "content": "{}".format(prompt)}
                ],
                max_tokens=1500,
            )
is_software = response.choices[0].message.content.strip()

print(is_software)

I don't have direct access to your Excel file or the database of Kongsberg Maritime products, but I can provide some general recommendations on how to create effective filter tags. Here are some points you should consider when checking and possibly adjusting the tags:

1. **Relevance**: Make sure that each tag accurately reflects the associated products or services. If a tag does not seem to correspond well with most products in a category, it might need to be changed.

2. **Specificity**: Tags should be specific enough to distinguish products within a category. Overly broad tags might not be helpful for filtering. For example, the term "advanced navigation system" could be broken down into more specific technologies if applicable.

3. **Consistency**: Use a consistent level of detail across all categories. This makes it easier for users to understand what kind of information each tag represents and to compare similar products across different categories.

4. **Comprehensiveness**: The

In [30]:
df_tags = pd.read_csv("./data/df_tags_use_app_22_11.csv")
print(len(df_tags))
df_tags = df_tags.drop_duplicates(subset=['url'])
print(len(df_tags))

291
277


In [31]:
# load the dict from pickle file
import pickle
with open('./data/url_technical_text_dict.pkl', 'rb') as handle:
    url_text_dict = pickle.load(handle)

In [32]:


def generate_tags_and_handle_rate_limit(df_tags):
    df_tags["is_software"] = ""

    # Iterate over the rows of the dataframe
    for index, row in df_tags.iterrows():
        print(index)
        # Create a prompt using the Product_Name, category, and the text from the website
        url = row["url"]
        product_name = row["Product_Name"]
        product_category = row["Product category"]

        website_text = url_text_dict.get(url, "")

        prompt = f"Given the website text: '{website_text}', and the product name: '{product_name}', is the product a software or a hardware? If it is a combination, then it is hardware. If its only software, then return software.  Return true if it's a software or false if it's a hardware."

        # Call the general_gpt function with this prompt
        for _ in range(3):  # Retry up to 3 times
            try:
                response = client.chat.completions.create(
                    model="gpt-4-1106-preview",
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant that determines whether a product is software or hardware. Your responses should be in JSON format and consist of a boolean value on the format: {'is_software': true/false} "}, 
                        {"role": "user", "content": "{}".format(prompt)}
                    ],
                    max_tokens=50,
                    response_format={ "type": "json_object" },
                    timeout=10  # Add a timeout
                )
                is_software = response.choices[0].message.content.strip()
                print(product_name)
                print(is_software)
                is_software_dict = json.loads(is_software)
                # Access the value of the 'is_software' key
                is_software_value = is_software_dict['is_software']
                print(is_software_value)
                print("_________________________________")
                # Save the determination in the 'is_software' column
                df_tags.loc[index, "is_software"] = is_software_value
                break
            except Exception as e:
                print(f"Error at index {index}: {e}")
                time.sleep(5)  # Wait for 5 seconds before retrying
            else:
                print(f"Skipping index {index} after 3 failed attempts")
                continue  # Skip the current index if the API call fails 3 times

    return df_tags

#read csv to df_tags

df_tags_tech = generate_tags_and_handle_rate_limit(df_tags)
df_tags_tech.to_csv("./data/df_tags_use_app_22_11_issoftware.csv", index=False)

0
Product Name
{
    "is_software": false
}
False
_________________________________
1
K-Sync Synchronization unit
{"is_software": false}
False
_________________________________
2
cNODE Midi - Transponder
{"is_software": false}
False
_________________________________
3
cNODE - Transponder, Embedable
{"is_software": false}
False
_________________________________
4
cNODE MiniS - Transponder
{"is_software": false}
False
_________________________________
5
cNODE Maxi - Transponder
{"is_software": false}
False
_________________________________
6
cNODE Micro - Transponder
{"is_software": false}
False
_________________________________
7
cNODE IQAM - Intelligent data analysis and monitoring
{"is_software": false}
False
_________________________________
8
cNODE - Transponder, for explosive atmosphere
{"is_software": false}
False
_________________________________
9
TTC 30 & TTC 10 - Transponders test and configuration units
{"is_software": false}
False
_________________________________
10
cNODE M

In [33]:

df_tags_tech.to_excel("./data/df_tags_use_app_22_11_issoftware.xlsx", index=False)

In [35]:
df = pd.read_csv("data/df_tags_use_app_22_11_issoftware.csv")
df.to_excel("data/df_tags_use_app_22_11_issoftware.xlsx")

In [36]:
df_new = pd.read_excel("data/df_tags_use_app_22_11_issoftware.xlsx")
df_new.to_csv("data/df_tags_use_app_22_11_issoftware.csv", index=False)

In [16]:
import json

tags_dict = {'Naval': '{\n    "Product Type": ["marine coating", "acoustic control system", "leak detection"],\n    "Technology": ["anti-fouling", "Hydroacoustic Positioning", "acoustic telemetry", "Inertial Navigation System"],\n    "Application": ["hull protection", "BOP operation", "early warning", "infrastructure safety"]\n}', 'Fish finding': '{\n    "Product Type": [\n        "portable sonar",\n        "echo sounder",\n        "sonar system",\n        "navigation software"\n    ],\n    "Technology": [\n        "chirp technology",\n        "split-beam technology",\n        "wideband chirp",\n        "multi-frequency echo sounder"\n    ],\n    "Application": [\n        "fish finding",\n        "fish stock assessment",\n        "maritime navigation",\n        "catch monitoring"\n    ]\n}', 'Surveillance & monitoring': '{\n    "Product Type": ["sonar system", "surveillance system"],\n    "Technology": ["subsea technology", "underwater surveillance"],\n    "Application": ["security", "naval defense"]\n}', 'Underwater navigation & positioning': '{\n    "Product Type": [\n        "Underwater Altimeters",\n        "Transponder",\n        "Acoustic Positioning System",\n        "Modem"\n    ],\n    "Technology": [\n        "HiPAP",\n        "Hydroacoustic",\n        "GNSS technology",\n        "Dynamic Positioning"\n    ],\n    "Application": [\n        "ROV navigation",\n        "Deep water operation",\n        "Subsea navigation",\n        "Position tracking"\n    ]\n}', 'Autonomous and uncrewed solutions': '{\n    "Product Type": [\n        "transponder",\n        "Autonomous Echo Sounders Programming",\n        "marine survey equipment",\n        "Software"\n    ],\n    "Technology": [\n        "acoustic communication",\n        "3D sonar sensing",\n        "sensor fusion",\n        "machine learning"\n    ],\n    "Application": [\n        "underwater_navigation",\n        "marine research",\n        "Data Management",\n        "environmental monitoring"\n    ]\n}', 'Product category': '{\n    "Product Type": ["Software", "Electronics", "Apparel"],\n    "Technology": ["AI", "Blockchain", "IoT"],\n    "Application": ["Data Analysis", "Payment Processing", "Wearable Tracking"]\n}', 'Geophysical survey': '{\n    "Product Type": [\n        "acoustic doppler profiler",\n        "sub-bottom profiler",\n        "marine survey equipment",\n        "oceanography equipment"\n    ],\n    "Technology": [\n        "narrow beam",\n        "multibeam echo sounder integration",\n        "parametric sub-bottom profiler",\n        "low frequency transducers"\n    ],\n    "Application": [\n        "seafloor mapping",\n        "sub-bottom imaging",\n        "sediment layer mapping",\n        "seabed penetration"\n    ]\n}', 'Surface navigation & positioning': '{\n    "Product Type": [\n        "scientific transceiver",\n        "echo sounder",\n        "marine transceiver",\n        "navigation echo sounder"\n    ],\n    "Technology": [\n        "underwater acoustics",\n        "wideband transceiver",\n        "multiplexing",\n        "dual power input"\n    ],\n    "Application": [\n        "fisheries science",\n        "ocean research",\n        "marine navigation",\n        "underwater vehicle navigation"\n    ]\n}', 'Seafloor mapping': '{\n    "Product Type": [\n        "portable hydrographic system",\n        "multibeam echosounder",\n        "side scan sonar",\n        "echo sounder"\n    ],\n    "Technology": [\n        "multibeam sonar",\n        "sidescan sonar",\n        "sonar_signal_processing",\n        "multifrequency"\n    ],\n    "Application": [\n        "seafloor mapping",\n        "ocean floor mapping",\n        "marine survey",\n        "bathymetric mapping"\n    ]\n}', 'Fishery research': '{\n    "Product Type": [\n        "Aquaculture monitoring system",\n        "Information transfer system",\n        "Analyser",\n        "Sensor"\n    ],\n    "Technology": [\n        "Environmental sensing",\n        "Maritime broadband radio",\n        "Submersible technology",\n        "Sensor fusion"\n    ],\n    "Application": [\n        "Biomass measurement",\n        "Fish farming efficiency",\n        "Seawater analysis",\n        "Environmental monitoring"\n    ]\n}', 'Maritime communications': '{\n    "Product Type": ["thermal_printer", "newsletter", "AIS transponder", "sonar software", "AIS base station", "integrated bridge system", "marine communication", "software update", "satellite communication", "AIS system", "electronic chart display", "communication equipment", "AIS Base Station management"],\n    "Technology": ["photographic_quality_output", "3D profiling", "transceiver", "maritime electronics", "IMO compliant", "data interpretation", "electromagnetic", "large_format_thermal_printing", "multi-head sonar support", "advanced_greyscale_capabilities", "high_resolution_imaging", "sdr_technology", "high-speed data transfer"],\n    "Application": ["vessel tracking", "collision avoidance", "maritime safety", "underwater survey", "vessel safety", "navigation aid tracking", "coastal surveillance", "maritime traffic monitoring", "remote operational coordination", "vessel data management", "space-based_ais", "offshore connectivity", "portable"]\n}'}


# Convert string values to dictionaries and remove newline characters and extra spaces
for key in tags_dict:
    tags_dict[key] = json.loads(tags_dict[key].replace('\n', '').replace('    ', ''))

print(tags_dict)




In [None]:
for index, row in df_tech_new_tags.iterrows():
    print(index)
    product_category = str(row["Product category"])  # Convert to string
    # Get the tags for the product category
    if product_category in tags_dict:
        category_tags = tags_dict[product_category]  # Use a different variable
        print(category_tags)
        # choose 8 tags from the tags list for each product
        messages = [
            {
                "role": "system",
                "content": "You are a helpful assistant that generates relevant tags for products. Your responses should be in JSON format and consist of a comma-separated list of tags."
            },
            {
                "role": "user",
                "content": f"Please refer to this list of tags: {category_tags}. For the product: {row['Product_Name']}, select the tags that best describe it. Your response should be a comma-separated list of tags from the provided list. Avoid using 'tag' as a tag, and do not use 'Product Type', 'Technology', or 'Application' as tags."
            }
        ]
        response = client.chat.completions.create(
            model="gpt-4-1106-preview",
            messages=messages,
            response_format={ "type": "json_object" }
        )
        tags = response.choices[0].message.content.strip()
        df_tech_new_tags.loc[index, "tags"] = tags
        
        print(row["Product_Name"])
        print(tags)
    else:
        print(f"Category '{product_category}' not found in tags_dict")


# save the dataframe as csv
df_tech_new_tags.to_csv("./data/df_tags_use_app_22_11.csv", index=False)


In [17]:
#column names
#print colmns name
print(df.columns)

Index(['Unnamed: 0', 'Product_Name', 'Product category', 'Application / use ',
       'url', 'new_tags', 'filter_tags', 'product_tags', 'image_url',
       'is_software', 'Unnamed: 10', 'Unnamed: 11'],
      dtype='object')


In [21]:
from openai._client import OpenAI
import streamlit as st
client = OpenAI(
    api_key=st.secrets["openai"]["api_key"],
)

In [29]:
import pandas as pd
df = pd.read_csv("data/df_tags_use_app_22_11_issoftware.csv")

# check if comma in product category
print(df["Product category"].str.contains(",").sum())

# iterate over rows and check if comma in product category
for index, row in df.iterrows():
    if isinstance(row["Product category"], str) and "," in row["Product category"]:
        # check if nan
        if not pd.isna(row["Product category"]):
            category_list = [category.strip() for category in row["Product category"].split(",")]

            for category in category_list:
                if category in tags_dict:
                    category_tags = tags_dict[category]  # Use a different variable
                    print(row["Product_Name"])
                    print(category_tags)
                    # choose 8 tags from the tags list for each product
                    messages = [
                        {
                            "role": "system",
                            "content": "You are a helpful assistant that generates relevant tags for products. Your responses should be in JSON format and consist of a comma-separated list of tags."
                        },
                        {
                            "role": "user",
                            "content": f"Please refer to this list of tags: {category_tags}. For the product: {row['Product_Name']}, select the tags that best describe it. Your response should be a comma-separated list of tags from the provided list. Avoid using 'tag' as a tag, and do not use 'Product Type', 'Technology', or 'Application' as tags."
                        }
                    ]
                    response = client.chat.completions.create(
                        model="gpt-4-1106-preview",
                        messages=messages,
                        response_format={ "type": "json_object" }
                    )
                    tags = response.choices[0].message.content.strip()
                    if pd.isna(df.loc[index, "product_tags"]):
                        df.loc[index, "product_tags"] = tags
                        print("Tags added wjen empty",tags)
                    else:
                        df.loc[index, "product_tags"] += ", " + tags
                        print("tags added togeter: ",tags)
                

9
transceivers for scientific echo sounders
{'Product Type': ['portable sonar', 'echo sounder', 'sonar system', 'navigation software'], 'Technology': ['chirp technology', 'split-beam technology', 'wideband chirp', 'multi-frequency echo sounder'], 'Application': ['fish finding', 'fish stock assessment', 'maritime navigation', 'catch monitoring']}
{
  "tags": "echo sounder, split-beam technology, multi-frequency echo sounder, fish stock assessment, catch monitoring"
}
transceivers for scientific echo sounders
{'Product Type': ['Aquaculture monitoring system', 'Information transfer system', 'Analyser', 'Sensor'], 'Technology': ['Environmental sensing', 'Maritime broadband radio', 'Submersible technology', 'Sensor fusion'], 'Application': ['Biomass measurement', 'Fish farming efficiency', 'Seawater analysis', 'Environmental monitoring']}
{
  "tags": ["Information transfer system", "Environmental sensing", "Sensor fusion", "Biomass measurement"]
}
650M Mini Sonar Head
{'Product Type': ['por

In [37]:
# iterate over rows and check if product name matches
df = pd.read_excel("data/df_tags_use_app_22_11_issoftware.xlsx")
for index, row in df.iterrows():
    if row["Product_Name"] == "PX MultiSensor Charger":
        category = row["Product category"]
        if category in tags_dict:
            category_tags = tags_dict[category]  # Use a different variable
            print(row["Product_Name"])
            print(category_tags)
            # choose 8 tags from the tags list for each product
            messages = [
                {
                    "role": "system",
                    "content": "You are a helpful assistant that generates relevant tags for products. Your responses should be in JSON format and consist of a comma-separated list of tags."
                },
                {
                    "role": "user",
                    "content": f"Please refer to this list of tags: {category_tags}. For the product: {row['Product_Name']}, select the tags that best describe it. Your response should be a comma-separated list of tags from the provided list. Avoid using 'tag' as a tag, and do not use 'Product Type', 'Technology', or 'Application' as tags."
                }
            ]
            response = client.chat.completions.create(
                model="gpt-4-1106-preview",
                messages=messages,
                response_format={ "type": "json_object" }
            )
            tags = response.choices[0].message.content.strip()
            df.loc[index, "product_tags"] = tags

PX MultiSensor Charger
{'Product Type': ['portable sonar', 'echo sounder', 'sonar system', 'navigation software'], 'Technology': ['chirp technology', 'split-beam technology', 'wideband chirp', 'multi-frequency echo sounder'], 'Application': ['fish finding', 'fish stock assessment', 'maritime navigation', 'catch monitoring']}


In [40]:
df.to_excel("data/df_tags_use_app_22_11_issoftware.xlsx", index=False)
df.to_csv("data/df_tags_use_app_22_11_issoftware.csv", index=False)

In [None]:
df_new = pd.read_excel("data/df_tags_use_app_22_11_issoftware.xlsx")
df_new.to_csv("data/df_tags_use_app_22_11_issoftware.csv", index=False)

In [12]:
df_new = pd.read_excel("data/tags_27_11.xlsx")
#rename column dic_tags to filter_tags
# df_new = df_new.rename(columns={"dict_tags": "filter_tags"})
# df_new = df_new.rename(columns={"tags": "product_tags"})
df_new.to_csv("data/tags_27_11.csv", index=False)

In [33]:
# #sazve to csv
# csv_pd = pd.read_csv("data/df_tags_use_app_22_11_issoftware.csv")
# csv_pd.to_excel("data/df_tags_use_app_22_11_issoftware.xlsx", index=False)
