# Graph generation

## Import

In [None]:
import add_data
import utility_function
import nest_asyncio
import RAGAStest
import pandas as pd
import json


In [None]:
!ls ../tests/data/profiles

In [None]:
user = "Mateo"
path_data_profiles = "../tests/data/profiles/"
path_data_context = "../tests/data/context/"
path_data_qa = "../tests/data/qa/"
path_data_results = "../tests/data/results/"

## Data

In [None]:
# Percorso del file CSV
csv_filename = user + ".csv"

# Leggere il file CSV
df = pd.read_csv(path_data_profiles + csv_filename)

# Stampa delle prime righe per verificare il contenuto
print(df.head())

# Estrazione delle informazioni principali
def extract_data(df):
    for index, row in df.iterrows():
        print(f"Date: {row['date']}")
        print(f"User: {row['user']}")
        print(f"Input: {row['interaction']}")
        print("-" * 50)

# Eseguire l'estrazione
extract_data(df)


## Add data

### Process the text

In [None]:
#costo 0,01 (30 interazioni)
interactions = []

for index, row in df.iterrows():
    input = utility_function.process_text(text=row['interaction'], user_name=row['user'], current_date=row['date'])
    interactions.append(input)

context_df = pd.DataFrame(interactions)
context_df.to_csv(path_data_context + user + "_context.csv", index=False)

### Clean graph

In [None]:
utility_function.clean_graph()

### Add to graph

In [None]:
#costo: 0,02 (aggiunta di 30 input + resolver)
nest_asyncio.apply()

#Adding data to the graph
for input in interactions:
    response = await add_data.add_data_to_graph(input)
    print(response)
    
#Resolving entities
nest_asyncio.apply()
res = await add_data.resolve_entities()
print(res)
    

Aggiunta singola

In [None]:
n = 2
print(interactions[n])

In [None]:
response = await add_data.add_data_to_graph(interactions[n])
print(response)

In [None]:
res = await add_data.resolve_entities()
print(res)

# Test

### Add indexes to Graph

In [None]:
utility_function.add_indexes()

### Esecuzione test

In [None]:
#costo 0.02*2 (10 domande)
results_graphRAG = []
results_RAG = []

with open(path_data_qa + user + "_qa.json", "r", encoding="utf-8") as f:
    dataset = json.load(f)
    
    graphRAG = RAGAStest.run_tests(dataset)
    print("GraphRAG: ", graphRAG)
    results_graphRAG.append({'user': user, 'results': graphRAG})
    
    RAG = RAGAStest.run_tests_RAG(dataset)
    print("RAG: ", RAG)
    results_RAG.append({'user': user, 'results': RAG})



## Stampa risultati su file

In [None]:

df_graphRAG = pd.DataFrame(results_graphRAG)
df_graphRAG.to_csv(path_data_results + "results_graphRAG.csv", mode='a', header=False, index=False)

df_RAG = pd.DataFrame(results_RAG)
df_RAG.to_csv(path_data_results + "results_RAG.csv", mode='a', header=False, index=False)

In [None]:
print(results_graphRAG)
print(results_RAG)

In [None]:
res_graphRAG = results_graphRAG[0]['results'].scores
res_RAG = results_RAG[0]['results'].scores

print(res_graphRAG)
print(res_RAG)


## Generazione grafici

### Grafico singolo

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Conversione a DataFrame
df_graphRAG = pd.DataFrame(res_graphRAG)
df_RAG = pd.DataFrame(res_RAG)

# Calcolo medie
mean_graphRAG = df_graphRAG.mean()
mean_RAG = df_RAG.mean()

# Unione per confronto
comparison_df = pd.DataFrame({
    'GraphRAG': mean_graphRAG,
    'RAG': mean_RAG
})

# Stampa dei valori medi
print("Confronto delle medie tra GraphRAG e RAG:")
print(comparison_df)

# Plot confronto
comparison_df.plot(kind='bar', figsize=(10, 6), rot=45)
plt.title('Confronto Medie Metriche tra GraphRAG e RAG')
plt.ylabel('Valore medio')
plt.ylim(0, 1)
plt.grid(True, axis='y')
plt.tight_layout()
plt.show()

In [None]:
!ls ../tests/data/results/

### Grafico complessivo

In [None]:
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path


CSV_GRAPH   = Path("../tests/data/results/results_graphRAG.csv")   
CSV_RAG     = Path("../tests/data/results/results_RAG.csv")       

COLOR_GRAPH = "#0f8b8d"   
COLOR_RAG   = "#07435d"   
BAR_WIDTH   = 0.30       

METRIC_LABELS = {                         
    "llm_context_precision_with_reference": "Context precision",
    "context_recall":                      "Context recall",
    "answer_relevancy":                    "Answer relevancy",
    "faithfulness":                        "Faithfulness",
    "semantic_similarity":                 "Answer similarity",
}

TITLE   = "GraphRAG vs RAG"  
YLABEL  = "Mean value"        
ANNOTATE_BARS = True                   


def load_metrics(csv_path: Path) -> pd.DataFrame:
    """
    Carica il CSV con la forma:
        NomeProfilo,"{'metricA': 0.5, 'metricB': 0.7, ...}"
    e restituisce un DataFrame largo:
        index = profili, colonne = metriche numeriche
    """
    raw = pd.read_csv(
        csv_path,
        header=None,
        names=["profile", "metrics_str"],
        quotechar='"',
        skipinitialspace=True,
        engine="python",           
    )
    
    expanded = (
        raw["metrics_str"]
        .apply(ast.literal_eval)    
        .apply(pd.Series)
    )
    expanded.index = raw["profile"]
    return expanded


def bar_labels(ax, bars):
    """Scrive il valore in cima a ogni barra."""
    for bar in bars:
        height = bar.get_height()
        ax.annotate(
            f"{height:.2f}",
            xy=(bar.get_x() + bar.get_width() / 2, height),
            xytext=(0, 4),
            textcoords="offset points",
            ha="center",
            va="bottom",
            fontsize=9,
        )



df_graph = load_metrics(CSV_GRAPH)
df_rag   = load_metrics(CSV_RAG)

mean_graph = df_graph.mean().rename("GraphRAG")
mean_rag   = df_rag.mean().rename("RAG")

comparison = pd.DataFrame([mean_graph, mean_rag]).T
comparison.index.name = "Metric"
print("\nConfronto delle medie:")
print(comparison.round(4))
print()


plt.rcParams.update(
    {
        "font.size": 11,
        "axes.spines.right": False,
        "axes.spines.top":   False,
    }
)

metrics = list(METRIC_LABELS.keys())
labels  = [METRIC_LABELS[m] for m in metrics]
x       = np.arange(len(metrics))

fig, ax = plt.subplots(figsize=(9, 5))

bars_graph = ax.bar(
    x - BAR_WIDTH / 2,
    comparison.loc[metrics, "GraphRAG"],
    BAR_WIDTH,
    label="GraphRAG",
    color=COLOR_GRAPH,
)
bars_rag = ax.bar(
    x + BAR_WIDTH / 2,
    comparison.loc[metrics, "RAG"],
    BAR_WIDTH,
    label="RAG",
    color=COLOR_RAG,
)

ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=20, ha="right")
ax.set_ylim(0, 1)
ax.set_title(TITLE, pad=15, weight="bold")
ax.set_ylabel(YLABEL)
ax.grid(True, axis="y", linestyle="--", linewidth=0.5, alpha=0.7)
ax.legend(frameon=False)

if ANNOTATE_BARS:
    bar_labels(ax, list(bars_graph) + list(bars_rag))

fig.tight_layout()
plt.show()

In [None]:
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path


CSV_GRAPH   = Path("../tests/data/results/results_graphRAG.csv")   
CSV_RAG     = Path("../tests/data/results/results_RAG.csv")    

METRIC_ORDER = [                      
    ("llm_context_precision_with_reference", "Context precision"),
    ("context_recall",                  "Context recall"),
    ("answer_relevancy",                "Answer relevancy"),
    ("faithfulness",                    "Faithfulness"),
    ("semantic_similarity",             "Answer similarity"),
]
CMAP = "viridis"                       
TITLE = "Δ (GraphRAG – RAG) per profile/metric"  
OUTFILE = "heatmap_delta.pdf"          


def load_metrics(csv_path: Path) -> pd.DataFrame:
    raw = pd.read_csv(
        csv_path,
        header=None,
        names=["profile", "metrics_str"],
        quotechar='"',
        skipinitialspace=True,
        engine="python",
    )
    expanded = raw["metrics_str"].apply(ast.literal_eval).apply(pd.Series)
    expanded.index = raw["profile"]
    return expanded


df_graph = load_metrics(CSV_GRAPH)
df_rag   = load_metrics(CSV_RAG)

delta = df_graph - df_rag       

row_order = delta.mean(axis=1).sort_values(ascending=False).index
delta = delta.loc[row_order]

col_order = delta.mean(axis=0).sort_values(ascending=False).index
delta = delta[col_order]


metric_labels = [METRIC_LABELS[c] for c in delta.columns]
profiles = delta.index.tolist()
values   = delta.values
                        

plt.rcParams.update({"font.size": 10, "axes.spines.top": False,
                     "axes.spines.right": False})

fig, ax = plt.subplots(figsize=(len(metric_labels)*1.6, len(profiles)*0.45 + 1))
im = ax.imshow(values, cmap=CMAP, aspect="auto")


ax.set_xticks(np.arange(len(metric_labels)))
ax.set_xticklabels(metric_labels, rotation=20, ha="right")
ax.set_yticks(np.arange(len(profiles)))
ax.set_yticklabels(profiles)
ax.set_title(TITLE, weight="bold", pad=12)

ax.set_xticks(np.arange(-.5, len(metric_labels), 1), minor=True)
ax.set_yticks(np.arange(-.5, len(profiles), 1), minor=True)
ax.grid(which="minor", linestyle=":", linewidth=0.4)

cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.ax.set_ylabel("Δ score", rotation=270, labelpad=14)

fig.tight_layout()

if OUTFILE:
    fig.savefig(OUTFILE)
    print(f"Figura salvata in {OUTFILE}")
else:
    plt.show()