In [9]:
import pandas as pd
# Create a reference to the CSV and import it into a Pandas DataFrame
csv_path = "Resources/EclipseBugs.csv"
eclipse_df = pd.read_csv(csv_path)

## Bugfixing Bonanza!

### Instructions

* Dig through the Jupyter Notebook provided and attempt to fix as many bugs as possible. There are a lot of them and the bugs get harder to deal with as the code progresses.

* Once you have finished bugfixing, perform some additional analysis on the dataset provided. See what interesting trends are buried deep within these bug logs for the Eclipse IDE. So long as you challenge yourself, bugs will pop up and you will get even more bugfixing practice.

### Hints

* After fixing the bugs in each block of code, be sure to run the cell below for an updated error.

* There are a few new concepts being covered within this Jupyter Notebook. The most complex of these concepts is that of multi-indexing and it is very likely that this is where many will get held up. Do not worry though, multi-indexing is not in the homework and is not required outside of this activity. It is simply an interesting/powerful feature of Pandas.


In [12]:
eclipse_df = eclipse_df.rename(columns={"Bug\nID": "Bug ID",
                                        "Assignee\nReal\nName": "Assignee Real Name",
                                        "Number of\nComments": "Number of Comments",
                                        "Reporter\nReal\nName": "Reporter Real Name",
                                        "Target\nMilestone": "Target Milestone"})

# Finding the average number of comments per bug
average_comments = eclipse_df["Number of Comments"].mean()
average_comments

8.75

Index(['Bug\nID', 'Product', 'Component', 'Assignee', 'Status', 'Resolution',
       'Summary', 'Changed', 'Assignee\nReal\nName', 'Classification',
       'Hardware', 'Number of\nComments', 'Opened', 'OS', 'Priority',
       'Reporter', 'Reporter\nReal\nName', 'Severity', 'Target\nMilestone',
       'Version', 'Votes'],
      dtype='object')

In [13]:
# Grouping the DataFrame by "Assignee"
assignee_group = eclipse_df.groupby("Assignee")

# Count how many of each component Assignees worked on and create DataFrame
assignee_work = pd.DataFrame(assignee_group["Component"].value_counts())
assignee_work.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [15]:
# Rename the "Component" column to "Component Bug Count"
assignee_work = assignee_work.rename(columns={"Component": "Component Bug Count"})
assignee_work.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Component Bug Count
Assignee,Component,Unnamed: 2_level_1
Aaron_Ferguson,UI,10
Adam_Schlegel,UI,7
ChrisAustin,User Assistance,3
Claude_Knaus,UI,31
Claude_Knaus,Text,7


In [18]:
# Find the percentage of bugs overall fixed by each Assignee
total_bugs = eclipse_df["Assignee"].count()
bugs_per_user = assignee_group["Assignee"].count()

user_bug_percent = pd.DataFrame((bugs_per_user/total_bugs)*100)
user_bug_percent.head()

Unnamed: 0_level_0,Assignee
Assignee,Unnamed: 1_level_1
Aaron_Ferguson,0.1
Adam_Schlegel,0.07
ChrisAustin,0.03
Claude_Knaus,0.38
Curtis_Windatt,0.06


In [20]:
user_bug_percent = user_bug_percent.rename(columns={"Assignee": "Percent of Total Bugs Assigned"})
user_bug_percent = user_bug_percent.reset_index()
user_bug_percent.head()

Unnamed: 0,Assignee,Percent of Total Bugs Assigned
0,Aaron_Ferguson,0.1
1,Adam_Schlegel,0.07
2,ChrisAustin,0.03
3,Claude_Knaus,0.38
4,Curtis_Windatt,0.06


In [23]:
assignee_work = assignee_work.reset_index()
assignee_work.head()

Unnamed: 0,index,Assignee,Component Bug Count,Percent of Total Bugs Assigned
0,0,Aaron_Ferguson,10,0.1
1,1,Adam_Schlegel,7,0.07
2,2,ChrisAustin,3,0.03
3,3,Claude_Knaus,31,0.38
4,4,Claude_Knaus,7,0.38


In [29]:
# Merge the "Percent of Total Bugs Assigned" into the DataFrame
assignee_work = assignee_work.merge(user_bug_percent, on="Assignee")

# Remove the extra columns
# assignee_work = assignee_work[["Assignee", "Percent of Total Bugs Assigned", "Component", "Component Bug Count"]]
assignee_work.head()

Unnamed: 0,index,Assignee,Component Bug Count,Percent of Total Bugs Assigned_x,Percent of Total Bugs Assigned_y,Percent of Total Bugs Assigned_x.1,Percent of Total Bugs Assigned_y.1,Percent of Total Bugs Assigned_x.2,Percent of Total Bugs Assigned_y.2,Percent of Total Bugs Assigned
0,0,Aaron_Ferguson,10,0.1,0.1,0.1,0.1,0.1,0.1,0.1
1,1,Adam_Schlegel,7,0.07,0.07,0.07,0.07,0.07,0.07,0.07
2,2,ChrisAustin,3,0.03,0.03,0.03,0.03,0.03,0.03,0.03
3,3,Claude_Knaus,31,0.38,0.38,0.38,0.38,0.38,0.38,0.38
4,4,Claude_Knaus,7,0.38,0.38,0.38,0.38,0.38,0.38,0.38
