In [55]:
import pandas as pd,numpy as np
from sklearn.model_selection import train_test_split,cross_val_score, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [49]:
data = pd.read_csv("./../reposistories/AI Patterns/embeddings/embeddings_roberta_base.csv")

In [21]:
data.head()

Unnamed: 0,pattern,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,...,dim_758,dim_759,dim_760,dim_761,dim_762,dim_763,dim_764,dim_765,dim_766,dim_767
0,Parallel Tool Execution,-0.314725,0.24638,0.323346,0.082805,-0.185299,-0.536945,-0.003571,0.255506,0.40186,...,0.423246,-0.205816,-0.546292,0.463974,0.010447,0.196149,0.735746,-0.571738,-0.449715,0.613145
1,Parallel Tool Execution,-0.323753,0.195208,0.29858,0.123353,-0.236241,-0.537081,-0.018814,0.214024,0.436132,...,0.413688,-0.182932,-0.532808,0.508296,0.008989,0.211335,0.843657,-0.497022,-0.436968,0.61026
2,Parallel Tool Execution,-0.335411,0.176051,0.320027,0.028302,-0.147751,-0.616085,0.001184,0.273051,0.406971,...,0.43677,-0.177007,-0.513979,0.445655,0.013305,0.222535,0.71987,-0.56296,-0.459748,0.592551
3,Parallel Tool Execution,-0.329763,0.209713,0.292018,0.090231,-0.285773,-0.622224,0.008809,0.24053,0.432888,...,0.376494,-0.189953,-0.549111,0.455687,-0.068236,0.200964,0.81221,-0.610309,-0.409704,0.561486
4,Parallel Tool Execution,-0.317384,0.18637,0.317539,0.035278,-0.3534,-0.574028,-0.004452,0.265043,0.416585,...,0.439583,-0.201953,-0.544256,0.436908,-0.046975,0.214433,0.674056,-0.620092,-0.431591,0.559672


In [50]:
le = LabelEncoder()
labels = le.fit_transform(data['pattern'])

In [51]:
X = data.drop(columns=['pattern'])
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [45]:
def print_scores(y_test, y_pred):
    print(classification_report(y_test, y_pred, target_names=le.classes_))

In [52]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_scores(y_test, y_pred)

                                                               precision    recall  f1-score   support

                                Adversarial Agent Interaction       1.00      0.80      0.89         5
Comprehensive Black-Box Explainability and Analysis Framework       1.00      1.00      1.00         2
                               Holistic LLM Agentic Framework       0.86      1.00      0.92         6
                                      Parallel Tool Execution       1.00      1.00      1.00         7

                                                     accuracy                           0.95        20
                                                    macro avg       0.96      0.95      0.95        20
                                                 weighted avg       0.96      0.95      0.95        20



In [53]:
model2 = RandomForestClassifier(n_estimators=5, random_state=42)
model2.fit(X_train, y_train)
y_pred2 = model2.predict(X_test)
print_scores(y_test, y_pred2)

                                                               precision    recall  f1-score   support

                                Adversarial Agent Interaction       0.71      1.00      0.83         5
Comprehensive Black-Box Explainability and Analysis Framework       1.00      1.00      1.00         2
                               Holistic LLM Agentic Framework       1.00      0.83      0.91         6
                                      Parallel Tool Execution       1.00      0.86      0.92         7

                                                     accuracy                           0.90        20
                                                    macro avg       0.93      0.92      0.92        20
                                                 weighted avg       0.93      0.90      0.90        20



In [54]:
model3 = SVC(kernel='linear', random_state=42)
model3.fit(X_train, y_train)
y_pred3 = model3.predict(X_test)
print_scores(y_test, y_pred3)

                                                               precision    recall  f1-score   support

                                Adversarial Agent Interaction       1.00      1.00      1.00         5
Comprehensive Black-Box Explainability and Analysis Framework       1.00      1.00      1.00         2
                               Holistic LLM Agentic Framework       1.00      1.00      1.00         6
                                      Parallel Tool Execution       1.00      1.00      1.00         7

                                                     accuracy                           1.00        20
                                                    macro avg       1.00      1.00      1.00        20
                                                 weighted avg       1.00      1.00      1.00        20



In [58]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model2, X, y, cv=skf)
print("Weighted F1-scores per fold:", scores)
print("Mean Weighted F1-score:", np.mean(scores))

Weighted F1-scores per fold: [0.85       0.85       0.84210526 0.94736842 0.89473684]
Mean Weighted F1-score: 0.8768421052631579


In [10]:
import importlib.util
import sys
import os

# Current working directory (where notebook/script is running)
current_dir = os.getcwd()

# Path to parent folder's utils.py
parent_file_path = os.path.abspath(os.path.join(current_dir, '..', 'embedding_generator.py'))

spec = importlib.util.spec_from_file_location("utils", parent_file_path)
utils = importlib.util.module_from_spec(spec)
sys.modules["utils"] = utils
spec.loader.exec_module(utils)

# Now you can use functions from utils
# utils.some_function()



In [11]:
emb_generator = utils.EmbeddingGenerator()

In [133]:
def predict_pattern(code_snippet: str):
    emb = emb_generator.generate_embedding(code_snippet)
    return le.inverse_transform(model3.predict([emb['last_hidden_state_mean'].tolist()])) [0]   

In [147]:
def remove_comments_and_docstrings(source: str) -> str:
    import io
    import tokenize

    io_obj = io.StringIO(source)
    out = ""
    prev_toktype = tokenize.INDENT
    last_lineno = -1
    last_col = 0

    for tok in tokenize.generate_tokens(io_obj.readline):
        token_type = tok.type
        token_string = tok.string
        start_line, start_col = tok.start
        end_line, end_col = tok.end
        ltext = tok.line

        if start_line > last_lineno:
            last_col = 0
        if start_col > last_col:
            out += " " * (start_col - last_col)
        if token_type == tokenize.COMMENT:
            pass
        elif token_type == tokenize.STRING:
            if prev_toktype != tokenize.INDENT and prev_toktype != tokenize.NEWLINE:
                if start_col > 0:
                    out += token_string
        else:
            out += token_string
        prev_toktype = token_type
        last_lineno = end_line
        last_col = end_col

    return out

In [169]:
with open("/home/hasinthaka/Documents/Projects/AI/AI Pattern Mining/Pattern Validator/reposistories/model_testing/AI_Patterns/patter_01.py", "r") as file:
    code_snippet = file.read()
    code_snippet = remove_comments_and_docstrings(code_snippet)
    print(predict_pattern(code_snippet))

Comprehensive Black-Box Explainability and Analysis Framework




In [165]:
testing_data = pd.read_csv("/home/hasinthaka/Documents/Projects/AI/AI Pattern Mining/Pattern Validator/reposistories/model_testing/embeddings/embeddings_last_hidden_state_mean.csv")
testing_data = pd.read_csv("/home/hasinthaka/Documents/Projects/AI/AI Pattern Mining/Pattern Validator/reposistories/AI Patterns/embeddings/embeddings_last_hidden_state_mean.csv")

In [168]:
testingX = testing_data.drop(columns=['pattern'])
le.inverse_transform(model.predict(testingX))

array(['Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Framework',
       'Comprehensive Black-Box Explainability and Analysis Fram

In [167]:
testing_data

Unnamed: 0,pattern,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,...,dim_758,dim_759,dim_760,dim_761,dim_762,dim_763,dim_764,dim_765,dim_766,dim_767
0,Comprehensive Black-Box Explainability and Ana...,-0.317769,0.197410,0.294419,0.138957,-0.340045,-0.378297,-0.043980,0.222403,0.309806,...,0.320432,-0.157043,-0.504431,0.480445,-0.078027,0.188739,0.697233,-0.576321,-0.487673,0.574970
1,Comprehensive Black-Box Explainability and Ana...,-0.340740,0.221235,0.302023,0.115376,-0.403925,-0.496594,-0.053883,0.217084,0.401127,...,0.361586,-0.165030,-0.527085,0.478153,-0.020872,0.189991,0.787863,-0.545732,-0.471074,0.611967
2,Comprehensive Black-Box Explainability and Ana...,-0.341645,0.222531,0.301541,0.111360,-0.388552,-0.495173,-0.052742,0.227298,0.401991,...,0.375088,-0.165572,-0.529002,0.475094,-0.022597,0.198457,0.795236,-0.555021,-0.471315,0.607366
3,Comprehensive Black-Box Explainability and Ana...,-0.333861,0.225147,0.295485,0.126908,-0.396392,-0.496941,-0.047139,0.214048,0.399208,...,0.361532,-0.157606,-0.524672,0.498826,-0.020257,0.190675,0.803288,-0.513003,-0.478666,0.607646
4,Comprehensive Black-Box Explainability and Ana...,-0.336602,0.207659,0.299450,0.074619,-0.265931,-0.356769,-0.039551,0.194115,0.294084,...,0.395404,-0.165853,-0.518985,0.446070,-0.069828,0.210827,0.647989,-0.612103,-0.489803,0.571726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,Adversarial Agent Interaction,-0.312475,0.261548,0.301844,0.038621,-0.403293,-0.467090,-0.053937,0.241709,0.460340,...,0.350064,-0.144538,-0.506010,0.522015,-0.011807,0.185584,0.837116,-0.460130,-0.461779,0.635094
93,Adversarial Agent Interaction,-0.279046,0.288393,0.324477,0.040584,-0.330424,-0.417354,-0.037866,0.230357,0.458793,...,0.330548,-0.156822,-0.530200,0.482182,0.014896,0.204299,0.750899,-0.427305,-0.472579,0.606119
94,Adversarial Agent Interaction,-0.348162,0.237003,0.312720,0.013411,-0.298488,-0.601176,-0.039253,0.324888,0.464097,...,0.469880,-0.171967,-0.565084,0.409847,0.042890,0.251759,0.767393,-0.559570,-0.472037,0.677017
95,Adversarial Agent Interaction,-0.348057,0.243624,0.319194,0.024236,-0.349900,-0.448371,-0.035535,0.163550,0.443444,...,0.373708,-0.142648,-0.491973,0.565370,0.024998,0.190472,0.933551,-0.423406,-0.476335,0.641062
