In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Sample DataFrame
data = {
    'MI_Incident': ['Inc1', 'Inc2', 'Inc3', 'Inc4', 'Inc5', 'Inc6'],
    'Scibert_pred': [0, 1, 0, 1, 0, 1],
    'Scibert_confidence': [0.7, 0.8, 0.9, 0.6, 0.85, 0.75],
    'key_term_pred': [-1, 0, 1, 1, -1, 0],
    'Combined_Output': [0, 1, 1, 1, 0, 0]
}

df = pd.DataFrame(data)

# Task 1: Bar chart showing the distribution in Scibert_confidence with Combined_Output coloring the bars
fig1 = px.histogram(df, x="Scibert_confidence", color="Combined_Output", nbins=5,
                   title="Distribution of Scibert Confidence", labels={'Combined_Output': 'Combined Output'})
fig1.update_layout(xaxis_title="Scibert Confidence", yaxis_title="Frequency")
fig1.show()

# Task 2: Counting the number of non '-1' values in key_term_pred
non_negative_key_term_count = df[df['key_term_pred'] != -1].shape[0]
print(f"Number of non '-1' values in key_term_pred: {non_negative_key_term_count}")

# Task 3.1: Pie chart based on Combined_Output
combined_output_counts = df['Combined_Output'].value_counts()

# Task 3.2 and 3.3: Shading for key_term_pred presence and absence
key_term_present_in_combined = df[df['key_term_pred'] != -1]['Combined_Output'].value_counts()
key_term_absent_in_combined = df[df['key_term_pred'] == -1]['Combined_Output'].value_counts()

# Pie chart with shading
fig2 = go.Figure()

# Full slices for Combined_Output
fig2.add_trace(go.Pie(
    labels=['0', '1'],
    values=[combined_output_counts[0], combined_output_counts[1]],
    hole=0.4,
    textinfo='label+percent',
    marker=dict(colors=['lightcoral', 'lightblue']),
    name='Combined Output'
))

# Shading slices inside for key_term presence and absence
fig2.add_trace(go.Pie(
    labels=['Key Term Present', 'Key Term Absent'],
    values=[key_term_present_in_combined.get(0, 0), key_term_absent_in_combined.get(0, 0)],
    hole=0.7,
    textinfo='none',
    marker=dict(colors=['darkred', 'white']),
    domain=dict(x=[0, 0.5])
))

fig2.add_trace(go.Pie(
    labels=['Key Term Present', 'Key Term Absent'],
    values=[key_term_present_in_combined.get(1, 0), key_term_absent_in_combined.get(1, 0)],
    hole=0.7,
    textinfo='none',
    marker=dict(colors=['darkblue', 'white']),
    domain=dict(x=[0.5, 1])
))

fig2.update_layout(title_text="Pie Chart of Combined_Output with Key Term Presence/Absence Shading")
fig2.show()


In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Sample DataFrame
data = {
    'MI_Incident': ['Inc1', 'Inc2', 'Inc3', 'Inc4', 'Inc5', 'Inc6'],
    'Scibert_pred': [0, 1, 0, 1, 0, 1],
    'Scibert_confidence': [0.7, 0.8, 0.9, 0.6, 0.85, 0.75],
    'key_term_pred': [-1, 0, 1, 1, -1, 0],
    'Combined_Output': [0, 1, 1, 1, 0, 0]
}

df = pd.DataFrame(data)

# Task 1: Separate bar charts for each class in Combined_Output
for class_value in df['Combined_Output'].unique():
    class_df = df[df['Combined_Output'] == class_value]
    fig = px.histogram(class_df, x="Scibert_confidence", nbins=5,
                       title=f"Distribution of Scibert Confidence for Combined_Output={class_value}",
                       labels={'Scibert_confidence': 'Scibert Confidence'},
                       color_discrete_sequence=[['lightcoral', 'lightblue'][class_value]])  # Class based coloring
    fig.update_layout(xaxis_title="Scibert Confidence", yaxis_title="Frequency")
    fig.show()

# Task 2: Two Pie charts showing the split where key_term_pred == Combined_Output for each class
for class_value in df['Combined_Output'].unique():
    # Data split for current class
    class_df = df[df['Combined_Output'] == class_value]
    key_term_match = class_df[class_df['key_term_pred'] == class_value].shape[0]  # Matching key_term
    key_term_absent = class_df[class_df['key_term_pred'] == -1].shape[0]  # Absent key_term
    
    # Pie chart
    fig = go.Figure()
    fig.add_trace(go.Pie(
        labels=['Key Term Matches Combined Output', 'Key Term Absent'],
        values=[key_term_match, key_term_absent],
        hole=0.4,
        textinfo='label+percent',
        marker=dict(colors=[['darkred', 'white'], ['darkblue', 'white']][class_value]),  # Class based coloring
        name=f"Combined_Output={class_value}"
    ))
    
    fig.update_layout(title_text=f"Pie Chart for Combined_Output={class_value} (Key Term Matches)")
    fig.show()
