In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Create a reference to the CSV and import it into a Pandas DataFrame
csv_path = "../Resources/EclipseBugs.csv"
eclipse_df = pd.read_csv(csv_path)
eclipse_df.head()

Unnamed: 0,Bug ID,Product,Component,Assignee,Status,Resolution,Summary,Changed,Assignee Real Name,Classification,...,Number of Comments,Opened,OS,Priority,Reporter,Reporter Real Name,Severity,Target Milestone,Version,Votes
0,3638,JDT,UI,aeschli,VERIFIED,FIXED,Package Viewer: order resource folders before ...,1/17/2002 7:28,Martin Aeschlimann,Eclipse,...,3,10/10/2001 22:58,Windows NT,P1,aeschli,Martin Aeschlimann,major,---,2,0
1,3854,JDT,UI,aeschli,VERIFIED,FIXED,Wrong execution's classpath. (1GEY0W0),1/18/2002 4:02,Martin Aeschlimann,Eclipse,...,5,10/10/2001 23:01,Windows NT,P1,david_audel,David Audel,normal,---,2,0
2,4188,JDT,UI,aeschli,VERIFIED,FIXED,type hierachy - typo (1GJW2XJ),1/28/2002 3:12,Martin Aeschlimann,Eclipse,...,3,10/10/2001 23:07,Windows 2000,P1,erich_gamma,Erich Gamma,normal,---,2,0
3,5115,JDT,Debug,aeschli,VERIFIED,FIXED,Workspace source locator fails with mulitple p...,11/13/2001 10:11,Martin Aeschlimann,Eclipse,...,10,10/19/2001 13:41,Windows 2000,P1,darin.eclipse,Darin Wright,normal,---,2,0
4,5820,JDT,UI,aeschli,VERIFIED,FIXED,Close all editors brings up hierarchy of object,11/20/2001 16:22,Martin Aeschlimann,Eclipse,...,5,11/12/2001 18:18,Windows 2000,P1,jed.anderson,Jed Anderson,normal,---,2,0


In [3]:
# Get a reference to the column names
eclipse_df.columns

Index(['Bug\nID', 'Product', 'Component', 'Assignee', 'Status', 'Resolution',
       'Summary', 'Changed', 'Assignee\nReal\nName', 'Classification',
       'Hardware', 'Number of\nComments', 'Opened', 'OS', 'Priority',
       'Reporter', 'Reporter\nReal\nName', 'Severity', 'Target\nMilestone',
       'Version', 'Votes'],
      dtype='object')

In [4]:
# Removing the newlines from column headers
eclipse_df = eclipse_df.rename(columns={"Bug\nID": "Bug ID",
                                        "Assignee\nReal\nName": "Assignee Real Name",
                                        "Number of\nComments": "Number of Comments",
                                        "Reporter\nReal\nName": "Reporter Real Name",
                                        "Target\nMilestone": "Target Milestone"})
eclipse_df.columns

Index(['Bug ID', 'Product', 'Component', 'Assignee', 'Status', 'Resolution',
       'Summary', 'Changed', 'Assignee Real Name', 'Classification',
       'Hardware', 'Number of Comments', 'Opened', 'OS', 'Priority',
       'Reporter', 'Reporter Real Name', 'Severity', 'Target Milestone',
       'Version', 'Votes'],
      dtype='object')

In [5]:
# Finding the average number of comments per bug
average_comments = eclipse_df["Number of Comments"].mean()
average_comments

8.75

In [6]:
# Grouping the DataFrame by "Assignee"
assignee_group = eclipse_df.groupby("Assignee")

# Count how many of each component Assignees worked on and create DataFrame of the data
assignee_work = pd.DataFrame(assignee_group["Component"].value_counts())
assignee_work.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [7]:
# Rename the "Component" column to "Component Bug Count"
assignee_work = assignee_work.rename(
    columns={"Component": "Component Bug Count"})
assignee_work.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component Bug Count
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [8]:
# Find the percentage of bugs overall fixed by each Assignee
total_bugs = eclipse_df["Assignee"].count()
bugs_per_user = assignee_group["Assignee"].count()

user_bug_percent = pd.DataFrame((bugs_per_user/total_bugs)*100)
user_bug_percent.head()

Unnamed: 0_level_0,Assignee
Assignee,Unnamed: 1_level_1
Aaron_Ferguson,0.1
Adam_Schlegel,0.07
ChrisAustin,0.03
Claude_Knaus,0.38
Curtis_Windatt,0.06


In [9]:
# Rename the "Assignee" column to "Percent of Total Bugs Assigned"
user_bug_percent = user_bug_percent.rename(
    columns={"Assignee": "Percent of Total Bugs Assigned"})

# Reset the index for this DataFrame so "Assignee" is a column
user_bug_percent = user_bug_percent.reset_index()
user_bug_percent.head()

Unnamed: 0,Assignee,Percent of Total Bugs Assigned
0,Aaron_Ferguson,0.1
1,Adam_Schlegel,0.07
2,ChrisAustin,0.03
3,Claude_Knaus,0.38
4,Curtis_Windatt,0.06


In [10]:
# Reset the index of "assignee_group" so that "Assignee" and "Component" are columns
assignee_work = assignee_work.reset_index()
assignee_work.head()

# Merge the "Percent of Total Bugs Assigned" into the DataFrame
assignee_work = assignee_work.merge(user_bug_percent, on="Assignee")

# Remove the extra columns
assignee_work = assignee_work[["Assignee", "Percent of Total Bugs Assigned",
                               "Component", "Component Bug Count"]]
assignee_work.head()

Unnamed: 0,Assignee,Percent of Total Bugs Assigned,Component,Component Bug Count
0,Aaron_Ferguson,0.1,UI,10
1,Adam_Schlegel,0.07,UI,7
2,ChrisAustin,0.03,User Assistance,3
3,Claude_Knaus,0.38,UI,31
4,Claude_Knaus,0.38,Text,7
