In [13]:
import pandas as pd
import os

reports_path = "../../generated/reports-no-bug"

data = []

users = os.listdir(reports_path)
for user in users:
    uploads_path = os.path.join(reports_path, user)
    uploads = os.listdir(uploads_path)
    for upload in uploads:
        path = os.path.join(uploads_path, upload)

        jacoco_path = os.path.join(path, "JaCoCo", "jacoco.csv")
        jacoco_valid = os.path.exists(jacoco_path)

        pitest_path = os.path.join(path, "Pitest", "mutations.xml")
        pitest_valid = os.path.exists(pitest_path)

        data.append([user, upload, jacoco_path, jacoco_valid, pitest_path, pitest_valid])

main_df = pd.DataFrame(data, columns=['user', 'timestamp', 'jacoco', 'jacoco_valid', 'pitest', 'pitest_valid'])
main_df['timestamp'] = pd.to_datetime(main_df['timestamp'], format='%Y-%m-%dT%H-%M-%S.%fZ', utc=True).dt.tz_convert("Europe/Brussels")

# Add group

In [14]:
df_users = pd.read_csv('../../generated/database/users.csv', usecols=['user', 'group']).dropna()
main_df = main_df.merge(df_users, on='user', how='left')

In [17]:
main_df

Unnamed: 0,user,timestamp,jacoco,jacoco_valid,pitest,pitest_valid,group
0,0d271530-be17-4538-bf04-dde3c6069b5f,2025-04-17 09:25:49.752000+02:00,../../generated/reports-no-bug\0d271530-be17-4...,True,../../generated/reports-no-bug\0d271530-be17-4...,True,A
1,0d271530-be17-4538-bf04-dde3c6069b5f,2025-04-17 10:07:37.190000+02:00,../../generated/reports-no-bug\0d271530-be17-4...,True,../../generated/reports-no-bug\0d271530-be17-4...,True,A
2,11b1644a-d11f-45cb-8a40-282a209059c0,2025-04-17 09:26:18.184000+02:00,../../generated/reports-no-bug\11b1644a-d11f-4...,True,../../generated/reports-no-bug\11b1644a-d11f-4...,True,A
3,11b1644a-d11f-45cb-8a40-282a209059c0,2025-04-17 10:07:31.004000+02:00,../../generated/reports-no-bug\11b1644a-d11f-4...,True,../../generated/reports-no-bug\11b1644a-d11f-4...,True,A
4,1d20c70c-aa71-49ff-abf5-6013870a28f8,2025-04-17 09:46:46.781000+02:00,../../generated/reports-no-bug\1d20c70c-aa71-4...,False,../../generated/reports-no-bug\1d20c70c-aa71-4...,False,A
5,1d20c70c-aa71-49ff-abf5-6013870a28f8,2025-04-17 10:06:49.533000+02:00,../../generated/reports-no-bug\1d20c70c-aa71-4...,False,../../generated/reports-no-bug\1d20c70c-aa71-4...,False,A
6,1f06f352-91b6-4373-87ed-5a5070723efb,2025-04-17 09:26:06.551000+02:00,../../generated/reports-no-bug\1f06f352-91b6-4...,True,../../generated/reports-no-bug\1f06f352-91b6-4...,True,A
7,1f06f352-91b6-4373-87ed-5a5070723efb,2025-04-17 10:06:44.823000+02:00,../../generated/reports-no-bug\1f06f352-91b6-4...,False,../../generated/reports-no-bug\1f06f352-91b6-4...,False,A
8,223bf16c-aaad-404f-8a47-61f3ced741e4,2025-04-17 09:26:01.215000+02:00,../../generated/reports-no-bug\223bf16c-aaad-4...,True,../../generated/reports-no-bug\223bf16c-aaad-4...,True,A
9,223bf16c-aaad-404f-8a47-61f3ced741e4,2025-04-17 10:06:56.843000+02:00,../../generated/reports-no-bug\223bf16c-aaad-4...,True,../../generated/reports-no-bug\223bf16c-aaad-4...,True,A


# Invalid data

In [16]:
invalid_jacoco = main_df[main_df["jacoco_valid"] == False]
invalid_pitest = main_df[main_df["pitest_valid"] == False]
both_invalid = main_df[(main_df["jacoco_valid"] == False) | (main_df["pitest_valid"] == False)]

print(f"Nombre total d'utilisateurs : {len(df_users)}")
print(f"Nombre total de projets : {len(main_df)}")
print("")

print(f"Nombre de projets invalides JaCoCo :", len(invalid_jacoco), f"({len(invalid_jacoco) / len(main_df) * 100:.2f}%)")
print(f"\t Groupe A : {len(invalid_jacoco[invalid_jacoco['group'] == 'A'])}")
print(f"\t Groupe B : {len(invalid_jacoco[invalid_jacoco['group'] == 'B'])}")
print("")

print(f"Nombre de projets invalides Pitest :", len(invalid_pitest), f"({len(invalid_pitest) / len(main_df) * 100:.2f}%)")
print(f"\t Groupe A : {len(invalid_pitest[invalid_pitest['group'] == 'A'])}")
print(f"\t Groupe B : {len(invalid_pitest[invalid_pitest['group'] == 'B'])}")
print("")

print(f"Nombre de projets invalide (au moins 1 des 2) :", len(both_invalid), f"({len(both_invalid) / len(main_df) * 100:.2f}%)")
print(f"\t Groupe A : {len(both_invalid[both_invalid['group'] == 'A'])}")
print(f"\t Groupe B : {len(both_invalid[both_invalid['group'] == 'B'])}")

Nombre total d'utilisateurs : 39
Nombre total de projets : 58

Nombre de projets invalides JaCoCo : 17 (29.31%)
	 Groupe A : 11
	 Groupe B : 6

Nombre de projets invalides Pitest : 17 (29.31%)
	 Groupe A : 11
	 Groupe B : 6

Nombre de projets invalide (au moins 1 des 2) : 17 (29.31%)
	 Groupe A : 11
	 Groupe B : 6


# Both valid

In [21]:
all_valid = main_df[(main_df["jacoco_valid"] == True) & (main_df["pitest_valid"] == True)]

print(f"Nombre total de projets {len(main_df)} = 2 projets x {int(len(main_df) / 2)} utilisateurs")
print("Nombre de projets valides JaCoCo & Pitest :", len(all_valid), f"({len(all_valid) / len(main_df) * 100:.2f}%)")
print(f"\t Groupe A : {len(all_valid[all_valid['group'] == 'A'])}")
print(f"\t Groupe B : {len(all_valid[all_valid['group'] == 'B'])}")

Nombre total de projets 58 = 2 projets x 29 utilisateurs
Nombre de projets valides JaCoCo & Pitest : 41 (70.69%)
	 Groupe A : 21
	 Groupe B : 20


# User both sessions JaCoCo valid

In [6]:
cols = ['instruction', 'branch', 'line', 'method']
users_with_timestamps = main_df[(main_df['jacoco_valid'] == True)].groupby('user')['timestamp'].count().reset_index()
jacoco_users_with_2_timestamps = users_with_timestamps[users_with_timestamps['timestamp'] == 2]['user']
jacoco_users = main_df[main_df['user'].isin(jacoco_users_with_2_timestamps)]

print(f"Nombre d'utilisateurs avec 2 rapports JaCoCo valides : {len(jacoco_users_with_2_timestamps)}/{len(users_with_timestamps)}")

Nombre d'utilisateurs avec 2 rapports JaCoCo valides : 16/25
