In [1]:
from actions import ActionsWorkflow, ActionsJobs, ActionsArtifacts
import LogExtractor as extractor

from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Table, TableStyle, Spacer, ListFlowable, ListItem, Spacer, Image
from datetime import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import tempfile
import os
import kaleido


## Plotter

In [2]:
def error_distribution_pie_chat_plt(error_df):

    plt.figure(figsize=(5, 5))
    plt.pie(
        error_passed_info['FAILED'].values,
        labels=error_passed_info['FAILED'].index.to_list(),
        autopct='%1.1f%%',
        startangle=90,
        colors=['firebrick', 'lightgreen', 'lightskyblue'],
    )
    plt.legend(title="Error Types", loc="upper right", bbox_to_anchor=(1.3, 1))
    plt.title('Failure Distribution')
    plt.tight_layout()

    #return fig

In [3]:
def error_distribution_pie_chart(error_df):
    # Filter for FAILED status
    failed_df = error_df[error_df['status'] == 'FAILED']

    # Group by category and count the number of FAILED statuses
    failed_counts = failed_df.groupby('category').size().reset_index(name='count')

    # Create the pie chart
    fig = px.pie(
        failed_counts, 
        names="category",  # Use 'category' for pie slice labels
        values="count",    # Use 'count' for pie slice sizes
        title="Distribuição de falhas por categoria",
        color_discrete_sequence=px.colors.sequential.RdBu,
    )

    fig.update_layout(
    #   width=400,  # Set the width of the plot (in pixels)
    #  height=400,  # Set the height of the plot (in pixels)
        margin=dict(l=20, r=20, t=40, b=20)  # Adjust margins if needed
    )

    # Make the pie chart circle bigger by adjusting the marker size
    fig.update_traces(
        marker=dict(line=dict(color='white', width=2)),  # Optional: Add a white border
        textposition='inside',  # Display text inside the slices
        textinfo='percent+label'  # Show percentage and label
    )

    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
        fig.write_image(tmpfile.name, format="png", width=800, height=400)
        return tmpfile.name
    

In [4]:
def plot_category_errors_bar(error_df):
    # Calculate the frequency of errors per category
    error_freq_df = error_df.groupby(['category', 'error']).size().reset_index(name='frequency')

    # Create the bar plot
    fig = px.bar(
        error_freq_df, 
        x="category", 
        y="frequency", 
        color="error",  # Use a discrete color sequence
        color_discrete_sequence=px.colors.sequential.RdBu,
        title="Frequência de tipos de erros por categoria",
        labels={'frequency': 'Frequency of Errors', 'category': 'Category'},
    )

    # Adjust layout to control bar width
    fig.update_layout(
        xaxis_title="Category",
        yaxis_title="Frequency of Errors",
        barmode='stack',  
        bargroupgap=0.1,  
        width=600,
        margin=dict(l=20, r=20, t=40, b=20)  
    )

    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
        fig.write_image(tmpfile.name, format="png", width=800, height=400)
        return tmpfile.name

In [5]:
def categories_failures_passed_rate(status_freq_df):
    # Calcular totais e percentuais
    status_freq_df['Total'] = status_freq_df['PASSED'] + status_freq_df['FAILED']
    status_freq_df['PASSED_pct'] = (status_freq_df['PASSED'] / status_freq_df['Total']) * 100
    status_freq_df['FAILED_pct'] = (status_freq_df['FAILED'] / status_freq_df['Total']) * 100

    # Transformar dados para formato longo
    status_freq_long = status_freq_df.melt(
        id_vars=['category'], 
        value_vars=['PASSED_pct', 'FAILED_pct'], 
        var_name='Status', 
        value_name='Percentage'
    )

    # Criar coluna com valores reais correspondentes
    status_freq_long['Real Value'] = status_freq_long.apply(
        lambda row: status_freq_df.loc[status_freq_df['category'] == row['category'], row['Status'].replace('_pct', '')].values[0], 
        axis=1
    )

    # Criar gráfico
    fig = px.bar(
        status_freq_long, 
        x="category", 
        y="Percentage", 
        color="Status", 
        barmode='stack', 
        title="Proporção de testes Aprovados/Falho",
        labels={'Percentage': 'Percentage'},
        text=status_freq_long["Real Value"]  # Exibir valores reais nas barras
    )

    # Ajustar layout para exibir os valores dentro das barras
    fig.update_traces(texttemplate='%{text}', textposition='inside')
    fig.update_yaxes(title='Percentage')
    fig.update_xaxes(title='Category')

    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpfile:
        fig.write_image(tmpfile.name, format="png", width=800, height=400)
        return tmpfile.name

In [6]:
def build_all_graphs():
    return {
        'category_errors_bar': plot_category_errors_bar(error_df),
        'error_distribution_pie': error_distribution_pie_chart(error_distribution_df),
        'failures_passed_rate': categories_failures_passed_rate(status_freq_df),
    }

## Creating pdf

In [7]:

def create_pdf(df, df_errors):
    # A4 size dimensions
    width, height = A4

    # Set 10% margin
    margin = 0.1 * width

    # Create PDF with margins
    doc = SimpleDocTemplate("report_v0.pdf", pagesize=A4,
                            leftMargin=margin, rightMargin=margin, topMargin=0.1*height, bottomMargin=0.1*height)

    # Styles
    styles = getSampleStyleSheet()
    heading_style = styles['Heading1']
    normal_style = styles['Normal']
    normal_style.alignment = 0  # 0 for left alignment

    bold_style = ParagraphStyle(
        name="Bold",
        parent=normal_style,
        fontName="Helvetica-Bold",
        fontSize=12
    )

    # Create the story (content) for the PDF
    story = []

    # Add title with fields
    story.extend(create_title(heading_style,normal_style))

    # Add each section to the story
    story.extend(create_execution_summary(df, normal_style, bold_style))
    story.extend(create_detailed_results(df, normal_style, bold_style, width, margin))
    story.extend(create_errors_summary(df_errors, normal_style, bold_style, width, margin))
    story.extend(create_graphs(normal_style, bold_style, width, margin))
    # Build PDF
    doc.build(story)

def create_title(heading_style, normal_style):
    # Initialize the story list
    story = []

    # Get current date and time
    agora = datetime.now()
    horario_dia = agora.strftime("%d/%m/%Y %H:%M:%S")

    # Create the title
    title_text = "Sumário de Resultados dos Testes"
    title_paragraph = Paragraph(f"<b>{title_text}</b>", heading_style)


    # Add title and date to the story as separate elements
    story.append(title_paragraph)

    # Create the formatted text for the execution date, system version, and environment
    execution_paragraph = Paragraph(f"Data da Execução: {horario_dia}", normal_style)
    version_paragraph = Paragraph("Versão do Sistema: ", normal_style)
    environment_paragraph = Paragraph("Ambiente: ", normal_style)

    # Add other paragraphs to the story
    story.append(execution_paragraph)
    story.append(Spacer(1, 6))  # Spacer between execution and version
    story.append(version_paragraph)
    story.append(Spacer(1, 6))  # Spacer between version and environment
    story.append(environment_paragraph)

    story.append(Spacer(1, 18))  # Add space at the end

    # Return the complete story
    return story

def create_execution_summary(df, normal_style, bold_style):
    story = []
    story.append(Paragraph("Resumo Geral", bold_style))
    story.append(Spacer(1, 6))

    fail_success_rate = (df['num_failed'].sum() / df['num_passed'].sum() * 100).round(2)

    # Criando a lista de resumo corretamente
    summary_data = {
        'Total de Testes:': df['total_runs'].sum(),
        'Testes Bem-Sucedidos:': df['num_passed'].sum(),
        'Testes com Falha:': df['num_failed'].sum(),
        'Taxa de Sucessos/Falha:': f"{fail_success_rate}%",  # Round to 2 decimal places
        'Tempo Mínimo de Execução:': f"{df['min_test_time'].min():.2f} s",
        'Tempo Médio de Execução:': f"{df['avg_test_time'].mean():.2f} s",
        'Duração Total dos Testes:': f"{df['total_duration'].sum():.2f} s"
    }

    # Criando a lista com bullet points
    bullet_points = [
        ListItem(Paragraph(f"<b>{key}</b> {value}", normal_style), leftIndent=20, spaceAfter=6)
        for key, value in summary_data.items()
    ]

    # Criando o ListFlowable
    list_flowable = ListFlowable(bullet_points, bulletType='bullet', leftIndent=20)

    # Adicionando ao relatório
    story.append(list_flowable)
    story.append(Spacer(1, 24))

    return story

def create_detailed_results(df, normal_style, bold_style, width, margin):
    story = []
    story.append(Paragraph("Detalhamento dos Testes", bold_style))
    story.append(Spacer(1, 12))
    df_renamed = df.copy()  # Create a copy of the DataFrame
    df_renamed.columns = [
        'Categoria de Teste', 
        'Testes Bem-Sucedidos', 
        'Falhas', 
        'Execuções', 
        'Tempo Mínimo de Execução', 
        'Tempo Médio', 
        'Duração Total'
    ]


    # Dropping the specified columns
    df_renamed = df_renamed.drop(columns=['Tempo Mínimo de Execução'])

    df_renamed['Tempo Médio'] = df_renamed['Tempo Médio'].astype(str) + ' sec'
    df_renamed['Duração Total'] = df_renamed['Duração Total'].astype(str) + ' sec'

    # Prepare the detailed data for the table
    detailed_tests_data = [[Paragraph(str(value), normal_style) for value in df_renamed.columns.tolist()]]  # Add header
    detailed_tests_data.extend(
        [[Paragraph(str(value), normal_style) for value in row] for row in df_renamed.values.tolist()]
    )

    # Calculate available width after applying margins
    available_width = width - 2 * margin  # Subtracting left and right margins

    # Define column proportions
    proportions = [0.3, 0.15, 0.15, 0.15, 0.2, 0.15]  # Example proportions

    total_proportion = sum(proportions)
    if total_proportion > 1:
        proportions = [p / total_proportion for p in proportions]  # Scale proportions to fit within 1

    # Calculate column widths based on the available width
    col_widths = [available_width * p for p in proportions]

    # Create the table
    detailed_table = Table(detailed_tests_data, colWidths=col_widths)
    detailed_table.setStyle(TableStyle([('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                                        ('GRID', (0, 0), (-1, -1), 0.5, colors.black)]))
    story.append(detailed_table)
    story.append(Spacer(1, 24))

    return story

def create_errors_summary(df, normal_style, bold_style, width, margin):
    """
    Creates a summary of errors in a PDF document.

    :param df: DataFrame containing error data.
    :param normal_style: Style for normal text.
    :param bold_style: Style for bold text.
    :param width: Width of the page.
    :param margin: Margin size.
    :return: A list of elements to be added to the PDF.
    """
    story = []
    story.append(Paragraph("Resumo dos Erros", bold_style))
    story.append(Spacer(1, 12))

    # Create a copy of the DataFrame and reset the index
    df_copy = df.copy().reset_index()
    df_copy.columns = [
        'Nome',
        'Status',
        'Categoria do Teste',
        'Tipo de erro',
        'Detalhes do erro (100 caracteres)',
        'JobId',
    ]

    df_copy = df_copy.drop('Detalhes do erro (100 caracteres)', axis=1)
    display(df_copy)


    # Prepare the detailed data for the table
    detailed_tests_data = [[Paragraph(str(value), normal_style) for value in df_copy.columns.tolist()]]  # Add header
    detailed_tests_data.extend(
        [[Paragraph(str(value), normal_style) for value in row] for row in df_copy.values.tolist()]
    )

    # Calculate available width after applying margins
    available_width = width - 2 * margin  # Subtracting left and right margins

    # Define column proportions
    proportions = [0.3, 0.15, 0.15, 0.15, 0.2, 0.1]  # Example proportions

    # Calculate column widths based on the available width
    col_widths = [available_width * p for p in proportions]

    # Create the table
    detailed_table = Table(detailed_tests_data, colWidths=col_widths)
    detailed_table.setStyle(TableStyle([
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),  # Center-align all cells
        ('GRID', (0, 0), (-1, -1), 0.5, colors.black),  # Add grid lines
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),  # Bold header font
    ]))
    story.append(detailed_table)
    story.append(Spacer(1, 24))

    return story

def create_graphs(normal_style, bold_style, width, margin):
    graph_files = build_all_graphs()
    story = []

    # Add a title to the PDF
    story.append(Spacer(1, 12))

    story.append(Paragraph("Visualização de dados", bold_style))
    story.append(Spacer(1, 12))

    # Add the bar chart to the PDF
    story.append(Image(graph_files['category_errors_bar'], width=500, height=250))  # Adjust size as needed
    story.append(Spacer(1, 24))

    # Add the pie chart to the PDF
    story.append(Image(graph_files['error_distribution_pie'], width=500, height=250))  # Adjust size as needed
    story.append(Spacer(1, 24))

    # Add the pass/fail rate bar chart to the PDF
    story.append(Image(graph_files['failures_passed_rate'], width=500, height=250))  # Adjust size as needed
    story.append(Spacer(1, 24))

    return story


## Pipeline


In [8]:
def get_ids_in_date_range(df, initial_date, final_date):
    # Convert the date strings to datetime objects with UTC timezone
    initial_date = pd.to_datetime(initial_date, format="%d-%m-%Y").tz_localize('UTC')
    final_date = pd.to_datetime(final_date, format="%d-%m-%Y").tz_localize('UTC')
    
    filtered_df = df[(df['createdAt'] >= initial_date) & (df['createdAt'] <= final_date)]

    return filtered_df['databaseId'].tolist()

In [None]:
repo_path = 'MagaluCloud/s3-specs'
query_size = 2

workflow = ActionsWorkflow(repository=repo_path, query_size=query_size)

initial_date = "16-01-2025"
final_date = "21-03-2025"
workflowIds = get_ids_in_date_range(workflow.df, initial_date, final_date)

jobs = ActionsJobs(repo_path)

all_workflows_jobs = pd.DataFrame()

for id in set(workflowIds):
    tmp = jobs.get_jobs(id)
    all_workflows_jobs = pd.concat([all_workflows_jobs, tmp])

artifacts = ActionsArtifacts(workflowIds, repository=repo_path)
all_tests_df = pd.DataFrame()
all_times_df = pd.DataFrame()
all_failures_df = pd.DataFrame()

## Mudanca de logica, pegar valores dos workflowsIds checar quais deles nao existem nos valores unique retornados em um dos pytest e entao baixar os que faltarem

for path in artifacts.paths:
    artifact = extractor.PytestArtifactLogExtractor(path)
    pytest_tests_status	, pytest_run_times, pytest_failures_errors = artifact.log_to_df()
    all_tests_df = pd.concat([all_tests_df, pytest_tests_status])
    all_times_df = pd.concat([all_times_df, pytest_run_times])
    all_failures_df = pd.concat([all_failures_df, pytest_failures_errors])


In [None]:
df = pd.read_parquet('./bin/pytest.categories.log.parquet')
df

Unnamed: 0_level_0,total,num,avg,min,durationType,databaseId
pytest_run_times,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
test_delete_object_with_versions,28.407,4,6.844,6.413,teardown duration,13269265723
test_delete_bucket_with_objects_with_versions,26.964,4,6.709,6.602,teardown duration,13269265723
test_delete_object_with_versions,25.219,4,6.222,5.940,setup duration,13269265723
test_delete_bucket_with_objects_with_versions,22.849,4,5.544,5.407,setup duration,13269265723
test_delete_object_with_versions,7.212,4,1.667,1.109,call duration,13269265723
...,...,...,...,...,...,...
existing_bucket_name,1.140,1,1.140,1.140,fixture duration,13269265723
bucket_with_lock_enabled,1.051,1,1.051,1.051,fixture duration,13269265723
bucket_with_one_object_and_lock_enabled,0.295,1,0.295,0.295,fixture duration,13269265723
active_mgc_workspace,0.151,8,0.008,0.008,fixture duration,13269265723


In [None]:
df1 = pd.DataFrame([1,2,1])
df2 = pd.DataFrame([1,2,3])

if df1.isin(df2) == True:
    print(1)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
# Setup
error_distribution_df = all_tests_df[['category', 'status']]
error_passed_info = error_distribution_df.groupby(['status','category']).value_counts()

status_freq_df = pd.concat([error_passed_info.FAILED, error_passed_info.PASSED], axis=1).fillna(0).astype(int)
status_freq_df.columns = ['FAILED','PASSED']
status_freq_df = status_freq_df.reset_index()


In [None]:
# Generate a tuple with the category name and the summed up values of all index of said class
def get_time(metric):
    return pd.Series(dict(map(lambda t, x: (x, all_times_df.loc[all_times_df.index == t, metric].sum()), all_tests_df.index.unique(), all_tests_df.category.unique())))

total_times = get_time('total')
avg_time_test = get_time('avg')
min_test_time  = get_time('min')

cpf_df = all_tests_df.groupby(['category','status']).size().unstack('status').fillna(0).astype(int)
cpf_df['total'] = cpf_df.sum(axis=1)
time_count_df = pd.concat([cpf_df['PASSED'], cpf_df['FAILED'], cpf_df['total'], min_test_time, avg_time_test, total_times], axis=1)
time_count_df.columns = ['num_passed', 'num_failed', 'total_runs', 'min_test_time', 'avg_test_time', 'total_duration']
time_count_df['avg_test_time'] = (time_count_df['avg_test_time'] / time_count_df['total_runs']).round(2) 

report_df = pd.DataFrame()
report_df['name'] = all_tests_df['category'].unique()
report_df = report_df.set_index('name')

# dfs used on pdf
report_df = pd.concat([report_df, time_count_df], axis=1).reset_index().round(2)
errors_df = all_failures_df

build_all_graphs()
create_pdf(report_df, errors_df)


NameError: name 'error_df' is not defined

# Other Plots


### Error distribution (Pie chart)

In [None]:
error_distribution_df = all_tests_df[['category', 'status']]
error_passed_info = error_distribution_df.groupby(['status','category']).value_counts()

status_freq_df = pd.concat([error_passed_info.FAILED, error_passed_info.PASSED], axis=1).fillna(0).astype(int)
status_freq_df.columns = ['FAILED','PASSED']
status_freq_df = status_freq_df.reset_index()


In [None]:
def error_distribution_pie_chat(error_df):
    # Filter for FAILED status
    failed_df = error_df[error_df['status'] == 'FAILED']

    # Group by category and count the number of FAILED statuses
    failed_counts = failed_df.groupby('category').size().reset_index(name='count')

    # Create the pie chart
    fig = px.pie(
        failed_counts, 
        names="category",  # Use 'category' for pie slice labels
        values="count",    # Use 'count' for pie slice sizes
        title="Distribution of FAILED Tests by Category",
        color_discrete_sequence=px.colors.sequential.RdBu,
    )

    fig.update_layout(
    #   width=400,  # Set the width of the plot (in pixels)
    #  height=400,  # Set the height of the plot (in pixels)
        margin=dict(l=20, r=20, t=40, b=20)  # Adjust margins if needed
    )

    # Make the pie chart circle bigger by adjusting the marker size
    fig.update_traces(
        marker=dict(line=dict(color='white', width=2)),  # Optional: Add a white border
        textposition='inside',  # Display text inside the slices
        textinfo='percent+label'  # Show percentage and label
    )

    # Show the pie chart
    fig.show()

    

In [None]:
def error_distribution_pie_chat_plt(error_df):

    plt.figure(figsize=(5, 5))
    plt.pie(
        error_passed_info['FAILED'].values,
        labels=error_passed_info['FAILED'].index.to_list(),
        autopct='%1.1f%%',
        startangle=90,
        colors=['firebrick', 'lightgreen', 'lightskyblue'],
    )
    plt.legend(title="Error Types", loc="upper right", bbox_to_anchor=(1.3, 1))
    plt.title('Failure Distribution')
    plt.tight_layout()
    plt.show()



### Proportion of Passed/Failed (%) with Real Values Displayed

In [None]:

# Calcular totais e percentuais
status_freq_df['Total'] = status_freq_df['PASSED'] + status_freq_df['FAILED']
status_freq_df['PASSED_pct'] = (status_freq_df['PASSED'] / status_freq_df['Total']) * 100
status_freq_df['FAILED_pct'] = (status_freq_df['FAILED'] / status_freq_df['Total']) * 100

# Transformar dados para formato longo
status_freq_long = status_freq_df.melt(
    id_vars=['category'], 
    value_vars=['PASSED_pct', 'FAILED_pct'], 
    var_name='Status', 
    value_name='Percentage'
)

# Criar coluna com valores reais correspondentes
status_freq_long['Real Value'] = status_freq_long.apply(
    lambda row: status_freq_df.loc[status_freq_df['category'] == row['category'], row['Status'].replace('_pct', '')].values[0], 
    axis=1
)

# Criar gráfico
fig = px.bar(
    status_freq_long, 
    x="category", 
    y="Percentage", 
    color="Status", 
    barmode='stack', 
    title="Proportion of Passed/Failed (%) with Real Values Displayed",
    labels={'Percentage': 'Percentage'},
    text=status_freq_long["Real Value"]  # Exibir valores reais nas barras
)

# Ajustar layout para exibir os valores dentro das barras
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_yaxes(title='Percentage')
fig.update_xaxes(title='Category')

fig.show()


### Error type rate


In [None]:
def plot_category_errors_bar(df):
    # Calculate the frequency of errors per category
    error_freq_df = df.groupby(['category', 'error']).size().reset_index(name='frequency')

    # Create the bar plot
    fig = px.bar(
        error_freq_df, 
        x="category", 
        y="frequency", 
        color="error",  # Use a discrete color sequence
        color_discrete_sequence=px.colors.sequential.RdBu,
        title="Frequency of Errors by Category",
        labels={'frequency': 'Frequency of Errors', 'category': 'Category'},
    )

    # Adjust layout to control bar width
    fig.update_layout(
        xaxis_title="Category",
        yaxis_title="Frequency of Errors",
        barmode='stack',  
        bargroupgap=0.1,  
        width=600,
        margin=dict(l=20, r=20, t=40, b=20)  
    )

    # Show the plot
    fig.show()