In [None]:
from sentence_transformers import SentenceTransformer
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import torch

# Load dataset layanan
df = pd.read_csv("multi-cloud-services-fix.csv")

# Gabungkan representasi dokumen
df["document"] = (
    df["provider"] + " | " +
    df["category"] + " | " +
    df["service_name"] + " | " +
    df["context"]
)

# Load model embedding
model = SentenceTransformer('all-MiniLM-L6-v2')

# Buat vektor embedding semua dokumen
documents = df["document"].tolist()
doc_embeddings = model.encode(documents, convert_to_tensor=True)

# Simulasi input pengguna dan ground truth
user_inputs = [
    "need services for compliance assessments",
    "want security service for DDoS Protection",
    "scalable database solution",
    "what is Azure Sentinel",
    "tell me about AWS Lambda"
]

ground_truth_outputs = [
    "AWS Artifact",
    "AWS Shield",
    "Amazon Aurora",
    "Cloud-native Security Information Event Management (SIEM) and intelligent security analytics",
    "Run code without thinking about servers."
]

predicted_outputs = []

for query in user_inputs:
    # Embed query
    query_embedding = model.encode(query, convert_to_tensor=True)

    # Cosine similarity
    sim_scores = torch.nn.functional.cosine_similarity(query_embedding, doc_embeddings)
    top_index = torch.argmax(sim_scores).item()

    # kondisi query
    if query.lower().startswith("what is") or query.lower().startswith("tell me about"):
        # jika ada kata "what is" pertanyaan maka memprediksi konteks
        predicted_outputs.append(df.iloc[top_index]["context"])
    else:
        # lainnya untuk memprediksi nama layanan
        predicted_outputs.append(df.iloc[top_index]["service_name"])


le = LabelEncoder()
le.fit(ground_truth_outputs + predicted_outputs)

y_true_encoded = le.transform(ground_truth_outputs)
y_pred_encoded = le.transform(predicted_outputs)

# Evaluasi
accuracy = accuracy_score(y_true_encoded, y_pred_encoded)
precision = precision_score(y_true_encoded, y_pred_encoded, average='macro', zero_division=0)
recall = recall_score(y_true_encoded, y_pred_encoded, average='macro', zero_division=0)
f1 = f1_score(y_true_encoded, y_pred_encoded, average='macro', zero_division=0)

# Tampilkan hasil
print("=== Evaluasi Rekomendasi ===")
print(f"Akurasi     : {accuracy:.2f}")
print(f"Precision   : {precision:.2f}")
print(f"Recall      : {recall:.2f}")
print(f"F1 Score    : {f1:.2f}")

print("\n--- Hasil Prediksi Individual ---")
for i in range(len(user_inputs)):
    print(f"\n[User Input]      {user_inputs[i]}")
    print(f"[Ground Truth]    {ground_truth_outputs[i]}")
    print(f"[Predicted]       {predicted_outputs[i]}")

=== Evaluasi Rekomendasi ===
Akurasi     : 0.80
Precision   : 0.67
Recall      : 0.67
F1 Score    : 0.67

--- Hasil Prediksi Individual ---

[User Input]      need services for compliance assessments
[Ground Truth]    AWS Artifact
[Predicted]       AWS Artifact

[User Input]      want security service for DDoS Protection
[Ground Truth]    AWS Shield
[Predicted]       AWS Shield

[User Input]      scalable database solution
[Ground Truth]    Amazon Aurora
[Predicted]       Amazon Aurora

[User Input]      what is Azure Sentinel
[Ground Truth]    Cloud-native Security Information Event Management (SIEM) and intelligent security analytics
[Predicted]       powerful service that enables applications to process and generate speech in various ways. Here are some of its key features such as speech-to-text, text-to-speech, speech translation, speaker recognition, custom neural voice, and multimodal AI apps.

[User Input]      tell me about AWS Lambda
[Ground Truth]    Run code without thinking