In [42]:
import pandas as pd

df = pd.read_csv("bond_issue_log.csv")

df['severity'] = df['severity'].replace({'tiny': 1, 'low': 2, 'normal': 3, 'high': 4, 'critical': 5})


In [25]:
# 1.	What is the least reliable component of the system?

# Reliability Score = (Bug Frequency * 0.4) + (Severity Index * 0.3) + (Reopened rate * 0.3)

# Bug frequency per module = the total number of bugs reported
# Severity Index per module = the average severity of the bugs reported 
# Reopned Rate per module = divide the number of reopened bugs by the total number of bugs

def calc_reliability_score(df):  
    """
    Calculate reliability score for module
    """
    
    # Calculate Bug Counts Trend per sprint
    frequency_index= df.groupby('module')['title'].count()

    # Calculate Severity Index Trend per sprint
    severity_index = df.groupby('module')['severity'].mean()

    # Calculate Reopened Rate Trend per sprint
    reopened_index = df.groupby('module')['reopened #'].mean()

    # Normalize the values
    frequency_index = frequency_index / frequency_index.max()
    severity_index = severity_index / severity_index.max()
    reopened_index = reopened_index / reopened_index.max()

    # Calculate Quality Trend Score as per given formula
    module_reliability_score = (frequency_index * 0.4) + (severity_index * 0.3) + (reopened_index * 0.3)

    return module_reliability_score


modules_scores = calc_reliability_score(df)
modules_scores





module
Core        0.785377
Module A    0.803663
Module B    0.688554
Module C    0.930874
Module D    0.624570
Module E    0.543377
Module F    0.657705
dtype: float64


In [23]:
def calc_quality_trend_score(df):
    """
    Calculate Quality Trend Score of sprints based on bug_count_trend, severity_index_trend, reopened_rate_trend.
    """
    
    # Calculate Bug Counts Trend per sprint
    bug_count_trend = df.groupby('sprint / week #')['title'].count()

    # Calculate Severity Index Trend per sprint
    severity_index_trend = df.groupby('sprint / week #')['severity'].mean()

    # Calculate Reopened Rate Trend per sprint
    reopened_rate_trend = df.groupby('sprint / week #')['reopened #'].mean()

    # Normalize the values
    bug_count_trend = bug_count_trend / bug_count_trend.max()
    severity_index_trend = severity_index_trend / severity_index_trend.max()
    reopened_rate_trend = reopened_rate_trend / reopened_rate_trend.max()

    # Calculate Quality Trend Score as per given formula
    quality_trend_score = (bug_count_trend * 0.4) + (severity_index_trend * 0.3) + (reopened_rate_trend * 0.3)

    return quality_trend_score


# Call the function
quality_trend_scores = calc_quality_trend_score(df)

# Displaying results
quality_trend_scores

sprint / week #
12    0.796319
13    0.805782
14    0.877506
15    0.718109
16    0.890319
17    0.875314
18    0.792239
19    0.879391
20    0.810436
21    0.831408
22    0.883529
23    0.860205
24    0.925689
25    0.782092
26    0.828935
27    0.660564
28    0.749139
29    0.691371
30    0.737104
31    0.532043
32    0.750979
33    0.709427
34    0.742228
35    0.707636
36    0.782004
37    0.688201
38    0.822475
39    0.670494
40    0.639713
dtype: float64

In [30]:
# 3.	What weeks were the most dynamic in testing/development?

# Bugs Found: Count the number of bugs identified each week. A high number of reported bugs could indicate a week of intensive testing.

# Bugs Resolved: Count the number of bugs resolved each week. A high number of resolved bugs could point to the productive and dynamic development period.

def count_bugs_dynamic(df):
    """
    Counts the number of bugs found and resolved in each sprint / week. orders descending
    """
    # Calculate bugs found in each sprint / week
    bugs_found = df[df['status'].isin(['verified', 'opened'])].groupby('sprint / week #').size().sort_values(ascending=False)

    # Calculate bugs resolved in each sprint / week
    bugs_resolved = df[df['status'] == 'fixed'].groupby('sprint / week #').size().sort_values(ascending=False)

    return bugs_found, bugs_resolved


# Call the function
dynamic_trend = count_bugs_dynamic(df)

# Displaying results
dynamic_trend

(sprint / week #
 16    40
 17    38
 14    37
 19    36
 23    36
 24    36
 13    36
 15    33
 20    31
 22    31
 21    29
 25    29
 12    28
 18    28
 28    27
 26    27
 27    26
 29    19
 37    12
 38     8
 36     7
 34     6
 33     5
 30     4
 32     3
 35     3
 40     3
 39     2
 dtype: int64,
 sprint / week #
 22    16
 40    14
 38    13
 39    11
 24    11
 18    10
 23     9
 16     9
 35     9
 34     8
 32     8
 12     8
 26     8
 17     8
 30     7
 37     7
 19     7
 21     7
 25     6
 13     6
 29     5
 27     5
 31     4
 28     4
 36     4
 20     4
 33     3
 15     2
 14     1
 dtype: int64)

In [31]:
# 4.	What weeks were the most silent?
# Bugs Found: Weeks with a lower number of discovered bugs could indicate quieter testing periods.

# Bugs Resolved: Weeks with a less number of resolved bugs could point towards a less active development period.

def count_bugs_silent(df):
    """
    Counts the number of bugs found and resolved in each sprint / week. orders ascending
    """
    # Calculate bugs found in each sprint / week
    bugs_found = df[df['status'].isin(['verified', 'opened'])].groupby('sprint / week #').size().sort_values(ascending=True)

    # Calculate bugs resolved in each sprint / week
    bugs_resolved = df[df['status'] == 'fixed'].groupby('sprint / week #').size().sort_values(ascending=True)

    return bugs_found, bugs_resolved


# Call the function
silent_trend = count_bugs_silent(df)

# Displaying results
silent_trend

(sprint / week #
 39     2
 40     3
 35     3
 32     3
 30     4
 33     5
 34     6
 36     7
 38     8
 37    12
 29    19
 27    26
 28    27
 26    27
 12    28
 18    28
 21    29
 25    29
 22    31
 20    31
 15    33
 23    36
 19    36
 13    36
 24    36
 14    37
 17    38
 16    40
 dtype: int64,
 sprint / week #
 14     1
 15     2
 33     3
 20     4
 31     4
 36     4
 28     4
 27     5
 29     5
 25     6
 13     6
 21     7
 37     7
 30     7
 19     7
 34     8
 32     8
 12     8
 26     8
 17     8
 23     9
 35     9
 16     9
 18    10
 24    11
 39    11
 38    13
 40    14
 22    16
 dtype: int64)

In [41]:
# 5.	Suggest a threshold for bug quantity per week (take into consideration their severity)

# To calculate a threshold, we could assign points to bugs based on their severity level. 
# For example I would suggest to set threshold to 200 points, this could be reached with ten tiny-severity bugs, or mix of severity of bugs.


def check_threshold(df, threshold):
    """
    Check if the total severity points exceed the threshold in each sprint / week.
    """

    # Calculate total severity points for each sprint / week
    total_severity_points = df.groupby('sprint / week #')['severity'].sum()

    # Check if total severity points exceed the threshold
    exceeds_threshold = total_severity_points > threshold

    return exceeds_threshold


threshold_check = check_threshold(df, 200)

threshold_check

sprint / week #
12     True
13     True
14     True
15    False
16     True
17     True
18     True
19     True
20     True
21     True
22     True
23     True
24     True
25     True
26     True
27    False
28    False
29    False
30    False
31    False
32    False
33    False
34    False
35    False
36    False
37    False
38     True
39     True
40     True
Name: severity, dtype: bool