In [14]:
%pip install plotly
%pip install -U kaleido

import os
import shutil
import sys
import tempfile

# --- Workaround for Kaleido path issues with spaces in directory names ---
# Find the Kaleido executable and copy it to a temp directory without spaces, then set env var
try:
    import kaleido
    kaleido_dir = os.path.dirname(kaleido.__file__)
    kaleido_exec_dir = os.path.join(kaleido_dir, "executable")
    kaleido_exec = os.path.join(kaleido_exec_dir, "kaleido")
    if os.path.exists(kaleido_exec):
        temp_exec_dir = os.path.join(tempfile.gettempdir(), "kaleido_exec")
        os.makedirs(temp_exec_dir, exist_ok=True)
        temp_exec = os.path.join(temp_exec_dir, "kaleido")
        shutil.copy2(kaleido_exec, temp_exec)
        # Also copy the bin directory if it exists
        bin_dir = os.path.join(kaleido_exec_dir, "bin")
        if os.path.exists(bin_dir):
            temp_bin_dir = os.path.join(temp_exec_dir, "bin")
            if not os.path.exists(temp_bin_dir):
                shutil.copytree(bin_dir, temp_bin_dir)
        os.environ["KALEIDO_EXECUTABLE_PATH"] = temp_exec
except Exception as e:
    print("Kaleido workaround failed:", e)

import matplotlib.pyplot as plt
import pandas as pd
from jira import JIRA
from IPython.display import display, Markdown, HTML
import io
import base64
import seaborn as sns
import plotly.graph_objects as go
from jira import JIRA




data = pd.read_csv('../data/processed/DemoData.csv')

# --- Only keep the plot code below ---
if 'Created' in data.columns:
    data['Created'] = pd.to_datetime(data['Created'], errors='coerce', utc=True)
    data['Created'] = data['Created'].dt.tz_localize(None)

if 'Created' in data.columns and 'Components' in data.columns and not data['Created'].isnull().all():
    last_month = data['Created'].max().to_period('M')
    last_6_months = pd.period_range(end=last_month, periods=6, freq='M').astype(str)
    data['Created_Month'] = data['Created'].dt.to_period('M').astype(str)
    data_last6 = data[data['Created_Month'].isin(last_6_months)]

  

# Prepare data as before
component_counts = data_last6['Components'].value_counts()
top_n = 10
top_components = component_counts[:top_n]
other_count = component_counts[top_n:].sum()
if other_count > 0:
    top_components['Other'] = other_count

labels = top_components.index.tolist()
values = top_components.values.tolist()



# --- Prepare summary values ---

report_title_html = f"""
<div style="background: #111; border-radius: 12px; padding: 18px 28px; margin-bottom: 24px; box-shadow: 0 2px 8px #222; text-align:center;">
    <h2 style="margin-top:0; color:#ffe066;">🔎 <b>Bug Risk Analysis: DemoData (APP)</b></h2>
    <p style="font-size: 1.5em; color: #fff; margin: 0 auto; display: inline-block; text-align: center;">
        Analyzing bug distribution and risk scores to spotlight the most vulnerable components.<br>
        <b>Data window:</b> Last 6 months<br>
        <b>Use:</b> Guide testing, triage, and resource allocation.
    </p>
</div>
"""
display(HTML(report_title_html))


# Create interactive 3D-like pie chart
# Increase figure size and set font color for readability
fig = go.Figure(
    data=[go.Pie(
        labels=labels,
        values=values,
        hole=0.3,  # donut style for 3D effect
        pull=[0.08]*len(labels),
        marker=dict(line=dict(color='#000', width=2)),
        hoverinfo='label+percent',
        textinfo='label+percent',
        textfont_size=20,
        textfont_color='white',  # Set label font color to white
    )]
)
# ...existing code...
fig.update_layout(
    title_text='DemoData (APP) Bugs Distribution by Component (Last 6 Months)',
    title_font_size=20,
    showlegend=True,
    paper_bgcolor='#222',
    plot_bgcolor='#222',
    font=dict(color='white', size=18),  # Set global font color and size
    width=900,   # Increase width
    height=700   # Increase height
)
fig.show()

# --- Fix for Kaleido error due to spaces in path ---
from IPython.display import Image

import plotly.io as pio

# Note: Do not set pio.kaleido.scope attributes; use pio.write_image(..., format="png", width=900, height=700) when exporting images.



# Explode the Components column so each row has a single component
exploded = data_last6.copy()
exploded = exploded.assign(Component=exploded['Components'].str.split(',')).explode('Component')
exploded['Component'] = exploded['Component'].str.strip()

# Reverse risk ranking: higher bug count = higher risk score
component_counts = exploded['Component'].value_counts()
component_risk_rank = component_counts.rank(ascending=True, method='min').astype(int)
component_risk_map = component_risk_rank.to_dict()
max_rank = component_risk_rank.max()

component_summary = (
    exploded.groupby('Component')
    .agg(
        Bug_Count=('key', 'nunique'),
    )
    .reset_index()
)
component_summary['Risk_Score'] = component_summary['Component'].map(component_risk_map)

def get_explanation(component, risk_score, max_rank):
    if risk_score == max_rank:
        return f'The "{component}" component has a very high history of bugs, making it high-risk.'
    elif risk_score <= 2:
        return f'The "{component}" component has a low history of bugs, making it low-risk.'
    else:
        return f'The "{component}" component has a moderate risk based on recent bug frequency.'

component_summary['Explanation'] = [
    get_explanation(row['Component'], row['Risk_Score'], max_rank)
    for _, row in component_summary.iterrows()
]

# ...rest of your table code...
def component_risk_table(data, project_name):
    # Use both Created and Resolved for last 6 months filter
    if 'Created' in data.columns:
        data['Created'] = pd.to_datetime(data['Created'], errors='coerce', utc=True).dt.tz_localize(None)
    if 'Resolved' in data.columns:
        data['Resolved'] = pd.to_datetime(data['Resolved'], errors='coerce', utc=True).dt.tz_localize(None)

    if 'Created' in data.columns and 'Resolved' in data.columns and not data['Created'].isnull().all():
        last_month = max(data['Created'].max(), data['Resolved'].max())
        last_month = last_month.to_period('M')
        last_6_months = pd.period_range(end=last_month, periods=6, freq='M').astype(str)
        data['Created_Month'] = data['Created'].dt.to_period('M').astype(str)
        data['Resolved_Month'] = data['Resolved'].dt.to_period('M').astype(str)
        data_last6 = data[
            data['Created_Month'].isin(last_6_months) | data['Resolved_Month'].isin(last_6_months)
        ]
    else:
        data_last6 = data.copy()

    component_risk_rank = data_last6['Components'].value_counts().rank(ascending=False, method='min').astype(int)
    component_risk_map = component_risk_rank.to_dict()
    max_rank = component_risk_rank.max()

    summary_df = data[['key', 'Components']].copy()
    # Assign risk score using component_risk_map, default to 1 if not found
    summary_df['Risk Score'] = data['Components'].apply(lambda x: component_risk_map.get(x, 1))
    summary_df['Explanation'] = [
        get_explanation(row['Components'], row['Risk Score'], max_rank) for _, row in summary_df.iterrows()
    ]
    summary_df = summary_df.rename(columns={'key': '🐞 Bug ID', 'Components': '🧩 Components'})

    # Add color to Risk Score
    def color_risk(val):
        try:
            score = int(str(val).split()[0])
            if score == max_rank:
                color = '#ff4d4d'  # Red
                text_color = 'white'
            elif score <= 2:
                color = '#85e085'  # Green
                text_color = 'black'
            else:
                color = '#ffd966'  # Yellow
                text_color = 'black'
            return f'background-color: {color}; color: {text_color}; font-weight: bold;'
        except:
            return ''
    
    # Fancy summary and table
    # Compute summary values for the last 6 months
    all_components = (
        data_last6['Components']
        .dropna()
        .astype(str)
        .str.split(',')
        .explode()
        .str.strip()
    )
    component_counts = all_components.value_counts()
    top_n = 10
    top_components = component_counts[:top_n]
    other_count = component_counts[top_n:].sum()
    if other_count > 0:
        top_components['Other'] = other_count

    # ...existing code...

    # Compute summary values for the last 6 months
    # ...existing code...
    # (Function ends here)

# Explode components for a component-centric table
# ...existing code...

# Group by component for summary
# ...after exploded is created...

# Group by component for summary
component_summary = (
    exploded.groupby('Component')
    .agg(
        Bug_Count=('key', 'nunique'),
    )
    .sort_values('Bug_Count', ascending=False)
    .reset_index()
)

# ...existing code...
# --- Summary values ---
project_name = "DemoData (APP)"
total_bugs = data_last6['key'].nunique()
top_component = component_counts.idxmax()
top_count = component_counts.max()
top_bug_count = exploded[exploded['Component'] == top_component]['key'].nunique()

summary_html = f"""
<div style="background: #111; border-radius: 12px; padding: 18px 28px; margin-bottom: 24px; box-shadow: 0 2px 8px #222;">
    <h2 style="margin-top:0; color:#ffe066;">✨ <b>Risk Scoring Summary for {project_name}</b> ✨</h2>
    <ul style="font-size: 1.2em; color: #fff;">
        <li><b>Total bugs (last 6 months):</b> {total_bugs}</li>
        <li><b>Top risk component:</b> <span style="color:#ffd166;">{top_component}</span>
            ({top_count} occurrences in {top_bug_count} bugs)
        </li>
        <li><b>Scoring based on:</b> Created date in last 6 months</li>
    </ul>
</div>
"""
display(HTML(summary_html))
# Assign risk score: highest bug count = highest risk score
component_summary['Risk_Score'] = component_summary['Bug_Count'].rank(ascending=True, method='min').astype(int)
max_rank = component_summary['Risk_Score'].max()

def get_explanation(component, risk_score, max_rank):
    if risk_score == max_rank:
        return f'The "{component}" component has a very high history of bugs, making it high-risk.'
    elif risk_score <= 2:
        return f'The "{component}" component has a low history of bugs, making it low-risk.'
    else:
        return f'The "{component}" component has a moderate risk based on recent bug frequency.'

# Add Explanation column BEFORE taking top 10
component_summary['Explanation'] = [
    get_explanation(row['Component'], row['Risk_Score'], max_rank)
    for _, row in component_summary.iterrows()
]

# ...existing code...

# Take top 10 components by bug count for display
component_summary_top = component_summary.head(10)

# Find min and max risk score in the displayed table
min_risk = component_summary_top['Risk_Score'].min()
max_risk = component_summary_top['Risk_Score'].max()

# Style table with color for Risk Score
def color_risk(val):
    try:
        score = int(val)
        if score == max_risk:
            color = '#ff4d4d'  # Red for high risk
            text_color = 'white'
        elif score == min_risk:
            color = '#85e085'  # Green for low risk
            text_color = 'black'
        else:
            color = '#ffd966'  # Yellow for moderate risk
            text_color = 'black'
        return f'background-color: {color}; color: {text_color}; font-weight: bold;'
    except Exception:
        return ''

styled_summary = component_summary_top.rename(
    columns={'Component': '🧩 Component', 'Bug_Count': '🐞 Bug Count', 'Risk_Score': 'Risk Score'}
).style.set_properties(
    subset=['Explanation'],
    **{'white-space': 'pre-wrap', 'word-break': 'break-word', 'max-width': '400px'}
).map(color_risk, subset=['Risk Score']).set_table_styles([
    {'selector': 'th', 'props': [('background-color', "#1a1368"), ('color', 'white'), ('font-size', '1.1em')]}
])

display(HTML(f"""
<div style="display: flex; flex-direction: column; align-items: center;">
    <h3 style="text-align:center; color:#ffd166;">📊 <b>Top Components by Bug Count</b></h3>
    <div style="min-width:450px; max-width:900px;">
        {styled_summary.to_html(escape=False)}
    </div>
    <p style="color:#888; font-size:0.95em; margin-top:18px;">🔎 <i>Table shows top 10 components by unique bug count.</i></p>
</div>
"""))
# ...existing code...
component_risk_table(data, "DemoData (APP)")

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


Unnamed: 0,🧩 Component,🐞 Bug Count,Risk Score,Explanation
0,Call,29,21,"The ""Call"" component has a very high history of bugs, making it high-risk."
1,CallHistory,20,20,"The ""CallHistory"" component has a moderate risk based on recent bug frequency."
2,Messages,8,19,"The ""Messages"" component has a moderate risk based on recent bug frequency."
3,Messaging,5,17,"The ""Messaging"" component has a moderate risk based on recent bug frequency."
4,Notifications,5,17,"The ""Notifications"" component has a moderate risk based on recent bug frequency."
5,Call detail,4,15,"The ""Call detail"" component has a moderate risk based on recent bug frequency."
6,Call-action,4,15,"The ""Call-action"" component has a moderate risk based on recent bug frequency."
7,Tech,3,11,"The ""Tech"" component has a moderate risk based on recent bug frequency."
8,Settings,3,11,"The ""Settings"" component has a moderate risk based on recent bug frequency."
9,People/Contacts,3,11,"The ""People/Contacts"" component has a moderate risk based on recent bug frequency."
