In [18]:
import pandas as pd
import os

code_analysis_generated_path = "../../generated/code-analysis"
if not os.path.exists(code_analysis_generated_path):
    os.makedirs(code_analysis_generated_path)

reports_path = "../../generated/reports-no-bug"

data = []

users = os.listdir(reports_path)
if len(users) == 0:
    raise FileNotFoundError(f"Le dossier {reports_path} est vide. Copier les uploads dans ce dossier avant d'exécuter le notebook.")

for user in users:
    uploads_path = os.path.join(reports_path, user)
    uploads = os.listdir(uploads_path)
    for upload in uploads:
        path = os.path.join(uploads_path, upload)

        jacoco_path = os.path.join(path, "JaCoCo", "jacoco.csv")
        jacoco_valid = os.path.exists(jacoco_path)

        data.append([user, upload, jacoco_path, jacoco_valid])

main_df = pd.DataFrame(data, columns=['user', 'timestamp', 'jacoco', 'jacoco_valid'])
main_df['timestamp'] = pd.to_datetime(main_df['timestamp'], format='%Y-%m-%dT%H-%M-%S.%fZ', utc=True).dt.tz_convert("Europe/Brussels")

# Add group

In [19]:
users_csv_path = '../../generated/database/users.csv'
if not os.path.exists(users_csv_path):
    raise FileNotFoundError(f"Le fichier {users_csv_path} n'existe pas. Exécutez le notebook 'notebooks/arrange data/Database data.ipynb' pour le générer.")

df_users = pd.read_csv(users_csv_path, usecols=['user', 'group']).dropna()
main_df = main_df.merge(df_users, on='user', how='left')

# Get data where JaCoCo report is valid in both project

In [22]:
cols = ['instruction', 'branch', 'line', 'method']
users_with_timestamps = main_df[(main_df['jacoco_valid'] == True)].groupby('user')['timestamp'].count().reset_index()
jacoco_users_with_2_timestamps = users_with_timestamps[users_with_timestamps['timestamp'] == 2]['user']
jacoco_users = main_df[main_df['user'].isin(jacoco_users_with_2_timestamps)]

print(f"Nombre d'utilisateurs avec 2 rapports JaCoCo valides : {len(jacoco_users_with_2_timestamps)}")

Nombre d'utilisateurs avec 2 rapports JaCoCo valides : 16


In [21]:
users_jacoco_data = []

for _, user_data in jacoco_users.iterrows():
    csv_file = pd.read_csv(user_data['jacoco']).drop(columns=['GROUP', 'PACKAGE', 'CLASS'])
    data = csv_file.sum().tolist()

    users_jacoco_data.append([user_data['user'], user_data['timestamp'], user_data['group']] + data)

df_jacoco = pd.DataFrame(
    users_jacoco_data,
    columns=['user', 'timestamp', 'group', 'instruction_missed', 'instruction_covered',
             'branch_missed', 'branch_covered', 'line_missed', 'line_covered',
             'complexity_missed', 'complexity_covered', 'method_missed', 'method_covered']
)

for col in cols:
    df_jacoco[f'{col}'] = df_jacoco[f'{col}_covered'] / (df_jacoco[f'{col}_missed'] + df_jacoco[f'{col}_covered'])

df_jacoco.loc[:, 'session'] = df_jacoco.groupby('user')['timestamp'].transform(lambda x: pd.factorize(x)[0] + 1)

df_jacoco['game_mode'] = df_jacoco.apply(
    lambda row: 'achievements' if (row['group'] == 'A' and row['session'] == 1) or (row['group'] == 'B' and row['session'] == 2) else 'leaderboard',
    axis=1
)

df_jacoco.loc[df_jacoco['session'] == 2, cols] = (
    df_jacoco[df_jacoco['session'] == 2][cols].values - df_jacoco[df_jacoco['session'] == 1][cols].values
)

df_jacoco.to_csv(f"{code_analysis_generated_path}/jacoco.csv", index=False)
df_jacoco


Unnamed: 0,user,timestamp,group,instruction_missed,instruction_covered,branch_missed,branch_covered,line_missed,line_covered,complexity_missed,complexity_covered,method_missed,method_covered,instruction,branch,line,method,session,game_mode
0,0d271530-be17-4538-bf04-dde3c6069b5f,2025-04-17 09:25:49.752000+02:00,A,552,158,48,8,149,45,77,23,53,19,0.222535,0.142857,0.231959,0.263889,1,achievements
1,0d271530-be17-4538-bf04-dde3c6069b5f,2025-04-17 10:07:37.190000+02:00,A,451,259,38,18,117,77,61,39,42,30,0.142254,0.178571,0.164948,0.152778,2,leaderboard
2,11b1644a-d11f-45cb-8a40-282a209059c0,2025-04-17 09:26:18.184000+02:00,A,552,158,48,8,149,45,77,23,53,19,0.222535,0.142857,0.231959,0.263889,1,achievements
3,11b1644a-d11f-45cb-8a40-282a209059c0,2025-04-17 10:07:31.004000+02:00,A,400,310,40,16,95,99,53,47,31,41,0.214085,0.142857,0.278351,0.305556,2,leaderboard
4,223bf16c-aaad-404f-8a47-61f3ced741e4,2025-04-17 09:26:01.215000+02:00,A,674,36,56,0,181,13,94,6,66,6,0.050704,0.0,0.06701,0.083333,1,achievements
5,223bf16c-aaad-404f-8a47-61f3ced741e4,2025-04-17 10:06:56.843000+02:00,A,630,80,55,1,162,32,86,14,58,14,0.061972,0.017857,0.097938,0.111111,2,leaderboard
6,2b2965f7-5d66-4569-802b-27c84aa69bff,2025-04-17 09:24:52.114000+02:00,B,687,23,56,0,184,10,95,5,67,5,0.032394,0.0,0.051546,0.069444,1,leaderboard
7,2b2965f7-5d66-4569-802b-27c84aa69bff,2025-04-17 10:05:03.689000+02:00,B,651,59,56,0,179,15,93,7,65,7,0.050704,0.0,0.025773,0.027778,2,achievements
8,60716858-6fcb-4953-88e3-12a51b3c7c47,2025-04-17 09:24:21.034000+02:00,B,687,23,56,0,184,10,95,5,67,5,0.032394,0.0,0.051546,0.069444,1,leaderboard
9,60716858-6fcb-4953-88e3-12a51b3c7c47,2025-04-17 10:05:22.289000+02:00,B,670,40,56,0,183,11,94,6,66,6,0.023944,0.0,0.005155,0.013889,2,achievements
