In [1]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hugging_face_token = user_secrets.get_secret("HuggingFaceToken")
!huggingface-cli login --token {hugging_face_token}

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
The token `miniproject` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `miniproject`


In [2]:
# from transformers import pipeline

# pipe = pipeline("text-generation", model="google/gemma-2-2b-it")
from google import genai
apikey = user_secrets.get_secret("gemini-hackbyte")
client = genai.Client(api_key=apikey)

In [3]:
!pip install supabase

Collecting supabase
  Downloading supabase-2.15.0-py3-none-any.whl.metadata (11 kB)
Collecting gotrue<3.0.0,>=2.11.0 (from supabase)
  Downloading gotrue-2.12.0-py3-none-any.whl.metadata (6.1 kB)
Collecting postgrest<1.1,>0.19 (from supabase)
  Downloading postgrest-1.0.1-py3-none-any.whl.metadata (3.5 kB)
Collecting realtime<2.5.0,>=2.4.0 (from supabase)
  Downloading realtime-2.4.2-py3-none-any.whl.metadata (6.6 kB)
Collecting storage3<0.12,>=0.10 (from supabase)
  Downloading storage3-0.11.3-py3-none-any.whl.metadata (1.8 kB)
Collecting supafunc<0.10,>=0.9 (from supabase)
  Downloading supafunc-0.9.4-py3-none-any.whl.metadata (1.2 kB)
Collecting pytest-mock<4.0.0,>=3.14.0 (from gotrue<3.0.0,>=2.11.0->supabase)
  Downloading pytest_mock-3.14.0-py3-none-any.whl.metadata (3.8 kB)
Collecting deprecation<3.0.0,>=2.1.0 (from postgrest<1.1,>0.19->supabase)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting aiohttp<4.0.0,>=3.11.14 (from realtim

In [4]:
from supabase import create_client, Client

supabase_url = user_secrets.get_secret("NEXT_PUBLIC_SUPABASE_URL")
supabase_anon_key = user_secrets.get_secret("NEXT_PUBLIC_SUPABASE_ANON_KEY")

supabase: Client = create_client(supabase_url, supabase_anon_key)

In [5]:
import numpy as np
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

embedding_model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/9.25k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [6]:
# Helper functions
def combine_document_features(component, description):
    return f"Component is {component} with Description {description}"

# this is for new entry
def add_document(title, component, description, created_by, project_id):
    doc_text = combine_document_features(component, description)
    embeddings = embedding_model.encode([doc_text])[0].tolist()
    embedding_str = str(embeddings)
    row = {
        "title": str(title),
        "description": str(description),
        "created_by": str(created_by),
        "component":  str(component),
        "project_id": int(project_id),
        "embeddings": embedding_str
    }
    response = supabase.table("bugs").insert(row).execute()

def retrieve_similar_documents(component, description,pid):
    new_doc_text = combine_document_features(component, description)
    new_embedding = embedding_model.encode([new_doc_text])[0].tolist()

    # Get all documents from the "bugs" table
    response = supabase.rpc("get_similar_bugs", {
    "query_embedding": new_embedding,
    "pid": pid
}).execute()

    if not response.data:
        return None, new_doc_text
    outputs=[]
    for i in response.data:
        outputs.append(i)
    return outputs,new_doc_text


def generate_title(soft_prompt, new_document):
    soft_prompt_examples = '\n\n'.join(
        f'Component: {value[0]}\n Description: {value[1]}\n title: {key}' for key, value in soft_prompt.items()
    )

    input_prompt = f'''Generate a Bug report title based on description and Component. Do not generate anything else. Try to keep it in a limit of 30 characters.
    Look at the below examples to understand the task.
    {soft_prompt_examples}
    Now using the examples above, try giving the output for the following input enclosed within the | delimiter.
    | {new_document} |.
    Give the title as instructed above. Give the title only'''

    # print("prompt is " + str(input_prompt))
    
    response = client.models.generate_content(model="gemini-2.0-flash", contents=input_prompt)
    return response.text

def suggest_title(component, description,pid):
    similar_docs, new_doc_text = retrieve_similar_documents(component,description,pid)
    
    examples={'''Concatenation of string variables slow compared to strings themselves''':['''JavaScript Engine''','''Ill be uploading a test case with various tests of string concatenation.  ; Mozilla (build 2000040308) shows good performance with all the ones that uses ; strings directly; e.g. string1 + string2.  its the last three it has ; problems with; they use string variables (e.g. var1 + var 2) in the ; concatenation.; ; try it out for yourselves.  all numberical values shown in the form fields is ; the execution time in millseconds.  the four tests on the left hand side; and ; the 2 at the top on the right hand side finished in around 1650ms on my P3/450. ;  this is just the same speed as Netscape Comm 4.72.  On the last three tests on ; the right hand side NC4.72 uses 7000ms; 10000ms and around 4500ms respectively; ; while Mozilla suddenly uses 10000ms; 14750ms and 5500ms on the same three tests. ;  Im slightly surprised by this sudden large increase in execution time.; ; the test results are very positive compared to IE5.01 though; except for the ; three tests with variables in them.  the 4 tests on the left hand side; from top ; to bottom; finish in around 5.5s; 9s; 12.5s and 16s in IE5.01.  in other words; ; a nearly linear increase in usage for each string that gets added.  the two top ; tests on the right hand side finish in around 9.3s and 20s; a _huge_ difference ; from both Mozilla and Communicator.  the last three tests; with variables; ; execute at just about the same speed as Communicator though (the last one ; actually about a second faster).'''],'''Linux/Slackware: undefined iostream symbols; app wont start''':['''HTML: Parser''','''johnny:~/mozilla/package# ./mozilla-apprunner.sh; MOZILLA_FIVE_HOME=/root/mozilla/package;   LD_LIBRARY_PATH=/root/mozilla/package:/usr/local/rvplayer5.0;       MOZ_PROGRAM=./apprunner;         moz_debug=0;      moz_debugger=; ./apprunner: error in loading shared libraries; /root/mozilla/package/libraptorhtmlpars.so: undefined symbol:; __vt_8iostream.3ios; ; I am running Slackware 4.0 and never have had any luck running any; of these milestone releases.  This was the M7 attempt.; Just thought you should know.; Thanks; Johnny O''']}
    
    soft_prompt = {i["title"]: [i["component"],i["description"]] for i in similar_docs if i["similarity"] > 0} if similar_docs else None
    if soft_prompt is None:
        soft_prompt = {}
    
    for k, v in examples.items():
        if len(soft_prompt) >= 3:
            break
        if k not in soft_prompt:
            soft_prompt[k] = v
    
    generated_title = generate_title(soft_prompt, new_doc_text)
    return f"{generated_title}"

In [7]:
df = pd.read_csv("/kaggle/input/isec-sdc-2025/train.csv").sample(frac=1)


In [8]:
suggested_title = suggest_title(df["Component"][8], df["Description"][8],1)
suggested_title

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

'Crash when ENTER command is keyed\n'

In [9]:
new_document = {"component":"DOM: Core & HTML",
"description":"Window.open() method Crashes Viewer as well as apprunner.; ; There are already somebugs about it. Bug 1098 is similar to this but for Windows; NT platform and already marked as duplicate of bug 1252. Actually bug 1252 is; little different than this bug; and bug 1252 is already marked as resolved and; fixed. I guess it would be better to report seperate bug this problem since; platform is also different and bug is also little different.; ; Product: Seamonkey [Apprunner as well as viewer]; Build: 04-22-17.; OS: Win-95 as well as MacOS.; ; Steps to Reproduce:; 1] Please copy the code Im providing.; 2] Save it as HTML file on your local machine.; 3] Open this HTML file in viewer as well as apprunner.; 4] There you will find on button called Open New Window. click this button.; ; Expected Results : Application should open new window.; ; Actual Results:; A] Viewer: Application crashes immediately.; B] Apprunner: Application opens new window; but not very clear and does not; point to; where it is supposed to. After closing that new window; if we again; click that button to open one more window; then application crashes.; ; Code:; ; <HTML>; <HEAD><TITLE>Window.Open()</TITLE></HEAD>; <BODY>; <form>; <H4>This Page will test whether window.open() method works or not.<Br>; After clicking the button; browser should open another window<br>; which is pointed to http://www.yahoo.com<H4>; <input type=button value=Open New Window; onClick=window.open(http://www.yahoo.com; testwindow)>; </form>; </BODY>; </HTML>"}
suggested_title = suggest_title(new_document["component"], new_document["description"],1)
print(suggested_title)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

window.open() crashes viewer



In [10]:
new_document = {"component":"DOM: Core & HTML",
"description":"User-Agent:       Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0; Build Identifier: Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.5) Gecko/20041107 Firefox/1.0; ; If the window.open() function is used to create a popup from within a frameset;; and a link back to a target frame in the parent frameset is dynamically written; using the document.writeln() function then the URL opens in a new window and; not in the targeted frame.  If the popup is actually a pre-written page on the; server then it all works as expected.  This behaviour does not happen in Mozilla; (Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7) Gecko/20040616) or IE6.; See Also http://forums.mozillazine.org/viewtopic.php?p=974823#974823; ; Reproducible: Always; Steps to Reproduce:; 1.Create a frameset; 2.in one of the frame windows use some JS with window.open() and; document.writeln() to create a popup with a link which is designed to put a URL; into one of the parent frames.; 3.click on dynamically created popup and link; ; Actual Results:  ; The link opens in a new browser window.; ; Expected Results:  ; The Link should have opened the url in the targeted frame; ; See Also http://forums.mozillazine.org/viewtopic.php?p=974823#974823"}
suggested_title = suggest_title(new_document["component"], new_document["description"],1)
print(suggested_title)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

window.open() fails with frameset

