In [9]:
import pandas as pd

In [10]:
allvars = [ 
    'ID',
    'Quality Factor 1', 'Entity-Fact 1', 'Quality Factor 2', 'Entity-Fact 2',
    'Context Factor 1', 'Context Factor 2', 'Context Factor 3',
    'Activity 1', 'Attribute 1', 'Impact 1', 'Activity 2', 'Attribute 2', 'Impact 2'
]

In [13]:
df = pd.read_excel('../../data/raw/prq-data-0-10.xlsx', sheet_name='Data').fillna('na')

  warn(msg)


In [15]:
# filter for rows where the participant explicitly mentioned a requirements quality impact
df = df[df['M'] == True]

In [23]:
# filter for rows that connect at least one quality factor with one activity
df = df.query('`Quality Factor 1` != "na" & `Activity 1` != "na"')

In [None]:
def query_qf(qualityfactor: str, entityfact: str = "") -> str : 
    queries: [str] = []
    for idx in ["1", "2"]:
        query: str = f'`Quality Factor {idx}`=="{qualityfactor}"'
        if entityfact:
            query += f' & `Entity-Fact {idx}`=="{entityfact}"'
        queries.append(query)

    return '(' + ' | '.join(queries) + ')'

def query_context(contextfactors: [str]) -> str :
    col_ctx: [str] = ['Context Factor 1', 'Context Factor 2', 'Context Factor 3']

    queries = [f'`{column}` in {contextfactors}' for column in col_ctx]

    return '(' + ' | '.join(queries) + ')'

def query_activity(activity: str, attribute: str = "", impact: int = None) -> str:
    queries: [str] = []
    for idx in ["1", "2"]:
        query = f'`Activity {idx}`=="{activity}"'
        if attribute:
            query += f' & `Attribute {idx}`=="{attribute}"'
            if impact:
                query += f' & `Impact {idx}`=={impact}'
        queries.append(query)

    return '(' + ' | '.join(queries) + ')'


In [None]:
qqf = query_qf(qualityfactor='orientation', entityfact='solution')
#qct = query_context(contextfactors=['Involvement'])
qac = query_activity(activity='Understanding')
#df.query(f'{qqf} & {qct} & {qac}')[allvars]
df.query(f'{qqf}')[allvars]

# Unspecific Impact

In [33]:
df_unspec = df.query('`Activity 1` == "Processing" | `Activity 2` == "Processing" | `Attribute 1` == "Unspecific" | `Attribute 2` == "Unspecific"')
df_spec = df.query('`Activity 1` != "Processing" & `Activity 2` != "Processing" & `Attribute 1` != "Unspecific" & `Attribute 2` != "Unspecific"')

In [48]:
print(f'{len(df_unspec)} reported relationships either state an unspecific activity or an unspecific attribute.')

34 reported relationships either state an unspecific activity or an unspecific attribute.


In [57]:
unspecified_activities = 0
for activityid in ['1', '2']:
    activity_counts = df_unspec[f'Activity {activityid}'].value_counts().to_dict()
    if 'Processing' in activity_counts:
        unspecified_activities += activity_counts['Processing']

print(f'A total of {unspecified_activities} activities were unspecified ("Processing").')

A total of 14 activities were unspecified.


In [58]:
unspecified_attributes = 0
for activityid in ['1', '2']:
    attribute_counts = df_unspec[f'Attribute {activityid}'].value_counts().to_dict()
    if 'Unspecific' in attribute_counts:
        unspecified_attributes += attribute_counts['Unspecific']

print(f'A total of {unspecified_attributes} attributes were unspecified ("Unspecified").')

A total of 29 attributes were unspecified.


## Sole Quality Factors

Filtering for all reported relationships of one single quality factor with a support of at least 2.

In [38]:
single_impact = df_spec.query('`Quality Factor 2` == "na" & `Context Factor 1` == "na" & `Context Factor 2` == "na" & `Context Factor 3` == "na"')

instances = {}
for index, row in single_impact.iterrows():
    entityfact = f'{row["Quality Factor 1"]}-{row["Entity-Fact 1"]}'

    if entityfact not in instances:
        instances[entityfact] = {
            'support': 0,
            'activities': {}
        }

    instances[entityfact]['support'] += 1

    for actid in ['1', '2']:
        if row[f'Activity {actid}'] != "na":
            activity = row[f'Activity {actid}']
            if activity not in instances[entityfact]['activities']:
                instances[entityfact]['activities'][activity] = {}

            attribute = row[f'Attribute {actid}']
            if attribute not in instances[entityfact]['activities'][activity]:
                instances[entityfact]['activities'][activity][attribute] = []
            instances[entityfact]['activities'][activity][attribute].append(row[f'Impact {actid}'])

{instance:instances[instance] for instance in instances if instances[instance]['support'] >= 2}

{'orientation-solution': {'support': 7,
  'activities': {'Understanding': {'Uniqueness': [-3.0, -2.0]},
   'Verifying': {'Completeness': [-2.0, -2.0]},
   'Estimating Effort': {'Traceability': [2.0]},
   'Translating': {'Stability': [2.0]},
   'Planning': {'Stability': [2.0]}}},
 'atomic-false': {'support': 2,
  'activities': {'Translating': {'Duration': [-2.0, -2.0]},
   'Planning': {'Stability': [-2.0]}}},
 'concise-false': {'support': 2,
  'activities': {'Understanding': {'Uniqueness': [-1.0], 'Duration': [-2.0]}}},
 'density-too high': {'support': 3,
  'activities': {'Understanding': {'Duration': [-2.0]},
   'Verifying': {'Duration': [-2.0]},
   'Interpreting': {'Uniqueness': [-2.0]}}}}

## Quality Factor Interaction

In [42]:
interaction = df_spec.query('`Quality Factor 2` != "na" & `Context Factor 1` == "na" & `Context Factor 2` == "na" & `Context Factor 3` == "na"')

instances = {}
for index, row in interaction.iterrows():
    entityfact = f'{row["Quality Factor 1"]}-{row["Entity-Fact 1"]} & {row["Quality Factor 2"]}-{row["Entity-Fact 2"]}'

    if entityfact not in instances:
        instances[entityfact] = {
            'support': 0,
            'activities': {}
        }

    instances[entityfact]['support'] += 1

    for actid in ['1', '2']:
        if row[f'Activity {actid}'] != "na":
            activity = row[f'Activity {actid}']
            if activity not in instances[entityfact]['activities']:
                instances[entityfact]['activities'][activity] = {}

            attribute = row[f'Attribute {actid}']
            if attribute not in instances[entityfact]['activities'][activity]:
                instances[entityfact]['activities'][activity][attribute] = []
            instances[entityfact]['activities'][activity][attribute].append(row[f'Impact {actid}'])

instances

{'semantically redundant-true & horizontal traces-missing': {'support': 1,
  'activities': {'Implementing': {'Coherence': [-2.0]}}},
 'level of detail-too little & type-non-functional': {'support': 1,
  'activities': {'Understanding': {'Uniqueness': [-2.0]}}},
 'jargonic-true & density-too high': {'support': 1,
  'activities': {'Assessing Feasibility': {'Precision': [-2.0]}}}}

## Context Factor Interaction

In [46]:
interaction = df_spec.query('`Context Factor 1` != "na"')

instances = {}
for index, row in interaction.iterrows():
    entityfact = f'{row["Quality Factor 1"]}-{row["Entity-Fact 1"]}'
    if row["Quality Factor 2"] != 'na':
        entityfact += f' & {row["Quality Factor 2"]}-{row["Entity-Fact 2"]}'
    entityfact += f' & {row["Context Factor 1"]}'
    for cid in ['2', '3']:
        if row[f'Context Factor {cid}'] != "na":
            entityfact += f' & {row["Context Factor "+cid]}'

    if entityfact not in instances:
        instances[entityfact] = {
            'support': 0,
            'activities': {}
        }

    instances[entityfact]['support'] += 1

    for actid in ['1', '2']:
        if row[f'Activity {actid}'] != "na":
            activity = row[f'Activity {actid}']
            if activity not in instances[entityfact]['activities']:
                instances[entityfact]['activities'][activity] = {}

            attribute = row[f'Attribute {actid}']
            if attribute not in instances[entityfact]['activities'][activity]:
                instances[entityfact]['activities'][activity][attribute] = []
            instances[entityfact]['activities'][activity][attribute].append(row[f'Impact {actid}'])

instances

{'orientation-solution & Involvement': {'support': 1,
  'activities': {'Understanding': {'Uniqueness': [0.0]}}},
 'orientation-solution & Novelty': {'support': 2,
  'activities': {'Assessing Feasibility': {'Precision': [2.0]},
   'Estimating Effort': {'Precision': [2.0]},
   'Translating': {'Stability': [1.0]}}},
 'atomic-true & Experience': {'support': 1,
  'activities': {'Understanding': {'Uniqueness': [3.0]}}},
 'orientation-solution & Peer Review': {'support': 1,
  'activities': {'Understanding': {'Uniqueness': [0.0]}}},
 'overloaded term-true & Involvement': {'support': 1,
  'activities': {'Understanding': {'Uniqueness': [2.0]}}},
 'density-too high & Experience': {'support': 1,
  'activities': {'Understanding': {'Duration': [0.0]}}},
 'density-too high & Supplementary Communication': {'support': 1,
  'activities': {'Verifying': {'Duration': [2.0]}}}}