In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import sys
import openpyxl

In [None]:
df = pd.read_excel("Data for data visuliazation_v4.xlsx")

# NLP Task Distribution by Application Category

This script processes the 'NLP task Category' to calculate both real and virtual values for each NLP task in different application categories. The virtual values are adjusted for better visualization, but the chart displays the real values for clarity.

In [None]:
# Exploding the 'NLP task Category' column to get individual tasks for each row
df['NLP_Tasks'] = df['NLP task Category'].str.split(',')
df_exploded = df.explode('NLP_Tasks')
df_exploded['NLP_Tasks'] = df_exploded['NLP_Tasks'].str.strip()  

app_nlp_detail = df_exploded.groupby(['Application Category', 'NLP_Tasks']).size().reset_index(name='real_value')

# Creating virtual values to improve chart visualization (adjusting counts for specific tasks)
app_nlp_detail['virtual_value'] = app_nlp_detail['real_value'].apply(
    lambda x: x + 2 if x == 3 else x + 1 if x == 5 else x  
)

# Filtering out rows with real value less than 3 for better visualization
app_nlp_detail = app_nlp_detail[app_nlp_detail['real_value'] >= 3]

app_counts = app_nlp_detail.groupby('Application Category')['real_value'].sum().reset_index(name='app_count')

app_nlp_detail['label'] = app_nlp_detail.apply(
    lambda row: f"{row['NLP_Tasks']}({row['real_value']})", 
    axis=1
)

app_counts['label_apps'] = app_counts.apply(
    lambda row: f"{row['Application Category'].replace(' and', '<br>and')}({row['app_count']})", 
    axis=1
)

app_nlp_detail = app_nlp_detail.merge(app_counts[['Application Category', 'label_apps']], on='Application Category')

print(app_nlp_detail)
print(app_counts)

In [None]:
print(app_nlp_detail['Application Category'].unique())

In [None]:
fig = px.sunburst(app_nlp_detail, 
                  path=['label_apps', 'label'], 
                  values='virtual_value', 
                  title='<b>Sunburst Chart of Applications and NLP Tasks</b>',
                  color_discrete_sequence=['#264653', '#2a9d8f',  
                    '#e9c46a', '#f4a261', 
                    '#e76f51', '#588157']
) 

fig.update_traces(
    # texttemplate='%{label}(%{value})',
    insidetextorientation='radial', 
    selector=dict(type='sunburst'), 
    textfont=dict(size=25),
)

fig.update_layout(
    font=dict(size=20),
    height=1300, 
    width=1300,
    margin=dict(l=10, r=10, t=50, b=10)
)

fig.show()