In [None]:
import pandas as pd
import sys
sys.path.append('../../')

from utils.dataloader import get_issues

issues = get_issues(filter='properties')

In [None]:
# conditions 

interest_group_conditions = {
    'quick resolution': (
        (issues['prop:resolution'] <= 1) &
        (issues['prop:state'] == 'closed')
    ),
    'slow resolution': (
        (issues['prop:resolution'] >= 162) &
        (issues['prop:state'] == 'closed')
    ),
    'hot topic': (
        (issues['prop:comments'] >= 10) |
        (issues['prop:users'] >= 3)
    ),
    'easy fix': (
        (issues['prop:files'] <= 2) &
        (issues['prop:files'] > 0) &
        (issues['prop:loc'] <= 23)
    ),
    'hard fix': (
        (issues['prop:files'] >= 36) |
        (issues['prop:loc'] >= 1329)
    ),
    'ignored': (
        (issues['prop:comments'] == 0) &
        (issues['prop:files'] == 0) &
        (issues['prop:loc'] == 0)
    )
}

In [None]:
# process the conditions
processed_conditions = {}
for key, condition in interest_group_conditions.items():
    processed_conditions[key] = issues[condition]

stats = []
for key, df in processed_conditions.items():
    stats.append({
        'Interest Group': key,
        'Number of Issues': df.shape[0],
        'Percentage of Total': df.shape[0] / issues.shape[0] * 100,
    })

# create a df that saves the interest group labels
interest_groups_results = []
for index, issue in issues.iterrows():
    results = {}

    for key, condition in interest_group_conditions.items():
        if condition[index]:
            results[key] = True
        else:
            results[key] = False

    interest_groups_results.append(results)

for key in interest_group_conditions.keys():
    issues[f'ig:{key}'] = [result[key] for result in interest_groups_results]

issues.to_csv('../../data/issues_properties.csv', index=False)

stats = pd.DataFrame(stats)
stats.to_csv('../../results/csv/interest_groups/interest_group_stats.csv', index=False)