In [None]:
# Install required libraries
%pip install openai pandas scikit-learn python-dotenv

# Imports and environment setup
import os
from dotenv import load_dotenv
import pandas as pd
import openai
from sklearn.metrics import accuracy_score, f1_score
import json

# Load environment variables from your .env file
load_dotenv(r"C:/Users/anika/OneDrive/Desktop/cs 383/Final Project/group-project-codeshield/TRAIN/.env")

# Configure OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Initialize OpenAI client
client = openai

In [3]:
# Load vulnerability types and CVSS scores
with open('vuln_types.txt') as f:
    vuln_types = [line.strip() for line in f if line.strip()]
with open('vuln_cvss_scores.txt') as f:
    cvss_scores = [float(line.strip()) for line in f if line.strip()]

In [6]:
# Load dataset
samples = []
decoder = json.JSONDecoder()
with open('training_code_snippets.jsonl', 'r') as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        idx = 0
        while idx < len(line):
            obj, consumed = decoder.raw_decode(line[idx:])
            samples.append(obj)
            idx += consumed

In [7]:
# Prepare DataFrame
df = pd.DataFrame(samples)
df.head()


Unnamed: 0,code_snippet,is_vulnerable,vulnerability_type
0,"import os\nfilename = input(""Enter filename: ""...",True,Path Traversal
1,# Simulated Unprotected API Endpoints snippet\...,True,Unprotected API Endpoints
2,"my_dict = {""a"": 1, ""b"": 2}\nprint(my_dict.get(...",False,
3,"username = ""admin""\npassword = ""1234""\nconnect...",True,Hardcoded Credentials
4,# Vulnerable snippet for Integer Overflow/Wrap...,True,Integer Overflow/Wraparound


In [20]:
# Define prompt template
prompt_template = (
    "You are a security expert. Classify the following code snippet's vulnerability type. Ensure you read through the snippets with extreme precision "
    "Respond with exactly one of the following vulnerability types: {types}.")

In [21]:
# Make a single API call example
from openai import OpenAI
client = OpenAI()
sample = df.iloc[0]
prompt = prompt_template.format(types=', '.join(vuln_types)) + "\n\nCode snippet:\n" + sample['code_snippet']
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
)
print(response.choices[0].message.content)


CWE-22: Path Traversal


In [None]:
import re
from sklearn.metrics import accuracy_score, f1_score

def clean_prediction(text):
    """Normalize and extract a valid vulnerability label from model output."""
    text = text.strip()
    for line in reversed(text.splitlines()):
        line = line.strip()
        for vuln in vuln_types:
            if vuln.lower() in line.lower():
                return vuln
    return "Unknown"

def evaluate_model(temp, max_tk, top_pv, freq_pen, pres_pen, cot):
    preds = []
    true_labels = []

    for _, row in df.iterrows():
        base_prompt = prompt_template.format(types=', '.join(vuln_types))
        code_snippet = row['code_snippet']
        user_message = f"{base_prompt}\n\nCode snippet:\n{code_snippet}"
        user_message += "\n\nPlease classify the vulnerability type."
        if cot:
            user_message += " Let's think step by step."
        try:
            response = client.chat.completions.create(
                model='gpt-4o-mini',
                messages=[
                    {"role": "system", "content": "You are a cybersecurity expert who strictly follows CWE standards and always answers with one valid CWE name from a given list."},
                    {'role': 'user', 'content': user_message}
                ],
                temperature=temp,
                max_tokens=max_tk,
                top_p=top_pv,
                frequency_penalty=freq_pen,
                presence_penalty=pres_pen
            )
            prediction = response.choices[0].message.content.strip().splitlines()[-1]
        except Exception as e:
            print(f"Error during API call: {e}")
            prediction = "ERROR"
        preds.append(prediction)
        true_labels.append(row['vulnerability_type'])

    # Metrics
    acc = accuracy_score(true_labels, preds)
    f1 = f1_score(true_labels, preds, average='weighted', zero_division=0)
    print(f"Parameters → temp={temp}, max_tokens={max_tk}, top_p={top_pv}, "
          f"freq_penalty={freq_pen}, presence_penalty={pres_pen}, CoT={cot}")
    print(f"Accuracy: {acc:.4f}, Weighted F1 Score: {f1:.4f}")
    preds.append(clean_prediction(prediction))
    
evaluate_model(
    temp=temperature,
    max_tk=max_tokens,
    top_pv=top_p,
    freq_pen=frequency_penalty,
    pres_pen=presence_penalty,
    cot=use_chain_of_thought
)

Parameters → temp=0.0, max_tokens=50, top_p=1.0, freq_penalty=0.0, presence_penalty=0.0, CoT=True
Accuracy: 0.4050, Weighted F1 Score: 0.3013
