In [1]:
import pandas as pd

# Charger le fichier CSV
file_path = "istio_request_v3.csv"
df = pd.read_csv(file_path)

df['grpc_response_status'].fillna(0, inplace=True)

df['response_flags'] = df['response_flags'].astype(str).str.strip()  # Convertir en string et enlever espaces

# Ajouter une colonne 'result' avec 'success' ou 'error'
df['result'] = df.apply(
    lambda row: 'success' if row['response_code'] == 200 and row['grpc_response_status'] == 0 and row['response_flags'] == '-' else 'error',
    axis=1
)

# Réorganiser les données par 'source_workload', 'destination_workload' et 'timestamp'
df_sorted = df.sort_values(by=['source_workload', 'destination_workload', 'timestamp'])

# Sauvegarder le fichier résultant
df_sorted.to_csv("aggregated_istio_data.csv", index=False)




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['grpc_response_status'].fillna(0, inplace=True)


In [2]:
import pandas as pd

# Charger le fichier
file_path = "aggregated_istio_data.csv"
df = pd.read_csv(file_path)

# Convertir timestamp en datetime pour le tri
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Trier avant la séparation
df.sort_values(by=['source_workload', 'destination_workload', 'timestamp'], inplace=True)

# Séparer les succès
df_success = df[df['result'] == 'success'].copy()

# Calculer new_request, new_istio_request_bytes et new_istio_request_duration_milliseconds avec 0 pour la première ligne
df_success['new_request'] = df_success.groupby(['source_workload', 'destination_workload'])['total_request'].diff().fillna(0)
df_success['new_istio_request_bytes'] = df_success.groupby(['source_workload', 'destination_workload'])['istio_request_bytes_sum'].diff().fillna(0)
df_success['new_istio_request_duration_milliseconds'] = df_success.groupby(['source_workload', 'destination_workload'])['istio_request_duration_milliseconds_sum'].diff().fillna(0)

# Appliquer la condition si new_request == 0
df_success.loc[df_success['new_request'] == 0, ['new_istio_request_bytes', 'new_istio_request_duration_milliseconds']] = 0

# Calculer latency
df_success['latency'] = df_success['new_istio_request_duration_milliseconds'] / df_success['new_request']
df_success['latency'].fillna(0, inplace=True)

# Sauvegarder les succès dans un fichier
df_success.to_csv("success_istio_data.csv", index=False)

# Séparer les erreurs HTTP et gRPC
df_http_errors = df[(df['result'] == 'error') & (df['request_protocol'] == 'http')].copy()
df_grpc_errors = df[(df['result'] == 'error') & (df['request_protocol'] == 'grpc')].copy()

error_files = []  # Liste des fichiers d'erreur générés

# Traitement des erreurs HTTP
http_groups = df_http_errors.groupby(['request_protocol', 'response_code', 'grpc_response_status', 'response_flags'])

for (request_protocol, response_code, grpc_status, response_flags), df_error in http_groups:
    df_error = df_error.copy()
    
    # Calculer new_request, new_istio_request_bytes et new_istio_request_duration_milliseconds avec 0 pour la première ligne
    df_error['new_request'] = df_error.groupby(['source_workload', 'destination_workload'])['total_request'].diff().fillna(0)
    df_error['new_istio_request_bytes'] = df_error.groupby(['source_workload', 'destination_workload'])['istio_request_bytes_sum'].diff().fillna(0)
    df_error['new_istio_request_duration_milliseconds'] = df_error.groupby(['source_workload', 'destination_workload'])['istio_request_duration_milliseconds_sum'].diff().fillna(0)
    
    # Appliquer la condition si new_request == 0
    df_error.loc[df_error['new_request'] == 0, ['new_istio_request_bytes', 'new_istio_request_duration_milliseconds']] = 0
    
    # Calculer latency
    df_error['latency'] = df_error['new_istio_request_duration_milliseconds'] / df_error['new_request']
    df_error['latency'].fillna(0, inplace=True)
    
    # Nommer le fichier selon l'erreur
    file_name = f"error_{request_protocol}_{response_code}_{response_flags}.csv"
    df_error.to_csv(file_name, index=False)
    error_files.append(df_error)

# Traitement des erreurs gRPC
grpc_groups = df_grpc_errors.groupby(['request_protocol', 'response_code', 'grpc_response_status', 'response_flags'])

for (request_protocol, response_code, grpc_status, response_flags), df_error in grpc_groups:
    df_error = df_error.copy()
    
    # Calculer new_request, new_istio_request_bytes et new_istio_request_duration_milliseconds avec 0 pour la première ligne
    df_error['new_request'] = df_error.groupby(['source_workload', 'destination_workload'])['total_request'].diff().fillna(0)
    df_error['new_istio_request_bytes'] = df_error.groupby(['source_workload', 'destination_workload'])['istio_request_bytes_sum'].diff().fillna(0)
    df_error['new_istio_request_duration_milliseconds'] = df_error.groupby(['source_workload', 'destination_workload'])['istio_request_duration_milliseconds_sum'].diff().fillna(0)
    
    # Appliquer la condition si new_request == 0
    df_error.loc[df_error['new_request'] == 0, ['new_istio_request_bytes', 'new_istio_request_duration_milliseconds']] = 0
    
    # Calculer latency
    df_error['latency'] = df_error['new_istio_request_duration_milliseconds'] / df_error['new_request']
    df_error['latency'].fillna(0, inplace=True)
    
    # Nommer le fichier selon l'erreur
    file_name = f"error_{request_protocol}_{response_code}_{grpc_status}_{response_flags}.csv"
    df_error.to_csv(file_name, index=False)
    error_files.append(df_error)

# Fusionner tous les fichiers (success + errors)
df_final = pd.concat([df_success] + error_files).sort_values(by=['source_workload', 'destination_workload', 'timestamp'])

# Sauvegarder le fichier final
df_final.to_csv("new_request_istio_data.csv", index=False)

print("Traitement terminé. Fichier sauvegardé sous 'new_request_istio_data.csv'.")

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_success['latency'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_error['latency'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

Traitement terminé. Fichier sauvegardé sous 'new_request_istio_data.csv'.


In [3]:
import pandas as pd 

# Charger le fichier
file_path = "new_request_istio_data.csv"
df = pd.read_csv(file_path)

# Convertir timestamp en datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Trier les données
df.sort_values(by=['source_workload', 'destination_workload', 'timestamp'], inplace=True)

# Calculer success rate, error rate, success count, error count, duration success request, duration error request et average latency par timestamp
grouped = df.groupby(['source_workload', 'destination_workload', 'timestamp'])
aggregated_rows = []

for (src, dst, ts), group in grouped:
    total_new_request = group['new_request'].sum()
    success_count = group[group['result'] == 'success']['new_request'].sum()
    error_count = total_new_request - success_count
    
    if total_new_request > 0:
        success_rate = success_count / total_new_request
        error_rate = 1 - success_rate
    else:
        success_rate = float('nan')
        error_rate = float('nan')
    
    # Calculer la durée des requêtes réussies et erronées
    duration_success_request = group[group['result'] == 'success']['latency'].sum()
    duration_error_request = group[group['result'] == 'error']['latency'].sum()
    average_latency = duration_success_request + duration_error_request

    # Séparer new_istio_request_bytes en success et error
    new_istio_request_bytes_success = group[group['result'] == 'success']['new_istio_request_bytes'].sum()
    new_istio_request_bytes_error = group[group['result'] == 'error']['new_istio_request_bytes'].sum()
    istio_request_bytes = new_istio_request_bytes_success+new_istio_request_bytes_error
    istio_request_duration_milliseconds = group['new_istio_request_duration_milliseconds'].sum()

    aggregated_rows.append([ts, src, dst, group['total_request'].max(), total_new_request, success_count, error_count, success_rate, error_rate, duration_success_request, duration_error_request, average_latency, new_istio_request_bytes_success, new_istio_request_bytes_error,istio_request_bytes,istio_request_duration_milliseconds])

# Créer un DataFrame final
df_final = pd.DataFrame(aggregated_rows, columns=['timestamp', 'source_workload', 'destination_workload', 'total_request', 'new_request', 'success_count', 'error_count', 'success_rate', 'error_rate', 'duration_success_request', 'duration_error_request', 'average_latency', 'new_istio_request_bytes_success', 'new_istio_request_bytes_error','istio_request_bytes','duration_milliseconds'])

# Sauvegarder le fichier
output_file = "aggregated_istio_rates.csv"
df_final.to_csv(output_file, index=False)

print(f"Traitement terminé. Fichier sauvegardé sous {output_file}.")


Traitement terminé. Fichier sauvegardé sous aggregated_istio_rates.csv.


In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Charger le fichier
file_path = "aggregated_istio_rates.csv"
df = pd.read_csv(file_path)

# Convertir timestamp en datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Définir les fenêtres de temps
time_windows = ['15S', '30S', '1min', '5min', '10min']

# Initialiser un tableau pour stocker les résultats
kpi_results = []

for window in time_windows:
    # Resample par fenêtre de temps
    df_resampled = (
        df
        .groupby(['source_workload', 'destination_workload'])
        .resample(window, on='timestamp', label='right', closed='right')
        .agg({
            'total_request': 'max',
            'new_request': 'sum',
            'success_count': 'sum',
            'error_count': 'sum',
            'success_rate': 'mean',
            'error_rate': 'mean',
            'average_latency': 'sum',
            'istio_request_bytes': 'sum',
            'duration_milliseconds': 'sum'
        })
        .reset_index()
    )

    # Pour chaque pair source-destination, calculer la vraie durée entre deux points non nuls
    def compute_real_durations(group):
        # Garder le timestamp courant pour référence
        last_time = None
        last_index = None
        durations = []
        
        for idx, row in group.iterrows():
            if row['new_request'] > 0:
                if last_time is not None:
                    duration = (row['timestamp'] - last_time).total_seconds()
                    durations.append(duration)
                else:
                    durations.append(np.nan)
                last_time = row['timestamp']
                last_index = idx
            else:
                durations.append(np.nan)
        # Remplir les valeurs manquantes en regardant en avant
        return pd.Series(durations, index=group.index)

    # Appliquer la fonction à chaque groupe
    df_resampled['real_duration'] = (
        df_resampled
        .groupby(['source_workload', 'destination_workload'])
        .apply(compute_real_durations)
        .reset_index(level=[0,1], drop=True)
    )

    # Calcul des KPI
    df_resampled['throughput'] = df_resampled['istio_request_bytes'] / df_resampled['real_duration']
    df_resampled['request_rate'] = df_resampled['new_request'] / df_resampled['real_duration']

    # Ajouter la fenêtre utilisée
    df_resampled['time_window'] = window

    kpi_results.append(df_resampled)

# Concaténer tous les résultats
df_final = pd.concat(kpi_results)

# Supprimer la ligne où timestamp == "2025-04-02 15:21:00"
starting_point = pd.Timestamp("2025-04-08 00:15:00")
df_final = df_final[df_final['timestamp'] != starting_point]

# Sauvegarder dans un fichier CSV
df_final.to_csv("kiali_kpi_metrics.csv", index=False)


  .resample(window, on='timestamp', label='right', closed='right')
  .apply(compute_real_durations)
  .resample(window, on='timestamp', label='right', closed='right')
  .apply(compute_real_durations)
  .apply(compute_real_durations)
  .apply(compute_real_durations)
  .apply(compute_real_durations)


In [5]:
import pandas as pd
import numpy as np

# Charger le fichier
df = pd.read_csv("kiali_kpi_metrics.csv")

# Convertir timestamp en datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Définir les fenêtres de temps et leurs intervalles respectifs
interval_mapping = {
    '15S': '1min',  # 15 secondes -> 1 minute
    '30S': '2min',  # 30 secondes -> 2 minutes
    '1min': '4min',  # 1 minute -> 4 minutes
    '5min': '10min',  # 5 minutes -> 10 minutes
    '10min': '10min'  # 10 minutes -> 10 minutes
}

latency_results = []

for window, interval in interval_mapping.items():
    df_filtered = df[df['time_window'] == window].copy()
    if df_filtered.empty:
        print(f"No data for time window: {window}")
        continue
    
    df_filtered.set_index('timestamp', inplace=True)
    
    try:
        df_grouped = (df_filtered.groupby(['source_workload', 'destination_workload'])
                      .resample(interval)
                      .agg({col: list for col in df.columns if col not in ['timestamp', 'source_workload', 'destination_workload', 'time_window']})
                      .reset_index())
    except Exception as e:
        print(f"Error during resampling for window {window}: {e}")
        continue
    
    for idx, row in df_grouped.iterrows():
        values = row.get('average_latency', [])
        frequencies = row.get('new_request', [])
        
        if not values or not frequencies or len(values) != len(frequencies):
            continue
        
        try:
            data = np.repeat(values, frequencies)
            if data.size == 0:
                continue
            
            percentiles = {
                'p50_latency': np.percentile(data, 50),
                'p90_latency': np.percentile(data, 90),
                'p95_latency': np.percentile(data, 95),
                'p99_latency': np.percentile(data, 99)
            }
        except Exception as e:
            print(f"Error calculating percentiles for row {idx} in window {window}: {e}")
            continue
        
        result = {**row.to_dict(), **percentiles, 'time_window': window}
        latency_results.append(result)

# Créer un DataFrame final
df_latency = pd.DataFrame(latency_results)

# Sauvegarder dans un fichier CSV
df_latency.to_csv("kiali_latency_percentiles.csv", index=False)

print("Traitement terminé. Fichier sauvegardé sous kiali_latency_percentiles.csv.")


Error calculating percentiles for row 389 in window 15S: repeats may not contain negative values.
Error calculating percentiles for row 208 in window 30S: repeats may not contain negative values.
Error calculating percentiles for row 97 in window 1min: repeats may not contain negative values.
Error calculating percentiles for row 41 in window 5min: repeats may not contain negative values.
Error calculating percentiles for row 55 in window 10min: negative dimensions are not allowed
Traitement terminé. Fichier sauvegardé sous kiali_latency_percentiles.csv.
