In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import sys
import openpyxl

In [None]:
df = pd.read_excel("Data for data visuliazation_v4.xlsx")

# Trend Chart for Application Category

In [None]:
df['Application Category'] = df['Application Category'].replace('Clinical decision making support.', 'Clinical decision making support')
df['Application Category'] = df['Application Category'].replace('Patient information support.', 'Patient information support')
df.to_excel("Data for data visuliazation_v4.xlsx")

In [None]:
print(df['Application Category'].value_counts())

In [None]:
df['Publication Date'] = pd.to_datetime(df['Publication Date'], format='%Y-%b')
start_date = '2023-01-01'
end_date = '2024-07-31'
filtered_df = df[(df['Publication Date'] >= start_date) & (df['Publication Date'] <= end_date)]

In [None]:
filtered_df['Date'] = filtered_df['Publication Date'].dt.to_period('M').astype(str)

monthly_counts = (
    filtered_df.assign(Application_Category=filtered_df['Application Category'])
)
monthly_counts = monthly_counts.groupby(['Date', 'Application_Category']).size().reset_index(name='Count')

custom_colors = [
    '#055B69',  
    '#852C2C',  
    '#A96A8C',  
    '#A97B50',
    '#618C95',
    '#4A579B', 
    '#7F61B6',  
    '#639A5E', 
    '#822482'
]

fig = px.line(
    monthly_counts,
    x='Date',
    y='Count',
    color='Application_Category', 
    color_discrete_sequence=custom_colors, 
    title="<b>Application Category Trend Chart</b>",  
    markers=True,
    text='Application_Category',  
    symbol='Application_Category'  
)

fig.update_traces(mode='lines+markers', marker=dict(size=11.5), line=dict(width=2.5),
                    hovertemplate='Date: %{x}<br>Application Category: %{text}<br>Count: %{y}<extra></extra>')

fig.update_layout(
    title_font_size=24,
    legend_font_size=12,
    legend_itemclick="toggleothers",
    legend=dict(
        yanchor="top",
        y=1,  
        xanchor="left",
        x=1,  
        orientation="v", 
        font=dict(size=13),
        bgcolor="rgba(0, 0, 0, 0)", 
    ),
    legend_title=None, 
)

fig.update_layout(
    height=550,
    font=dict(family="Arial, sans-serif", size=12),  
    xaxis=dict(
        showgrid=True, 
        gridcolor='LightGray',
        title='<b>Publication Date</b>', 
        titlefont=dict(size=16, weight='bold'),
        tickfont=dict(size=14, color='black')   
    ),  
    yaxis=dict(
        showgrid=True, 
        gridcolor='LightGray',
        title='<b>Count</b>',  
        titlefont=dict(size=16, weight='bold'),
        tickfont=dict(size=14, color='black') 
    ),
    autosize=True
)

end_date_adjusted = pd.to_datetime(end_date) - pd.Timedelta(days=27)
fig.update_xaxes(range=[start_date, end_date_adjusted], tickangle=-45, dtick='M1', tickformat='%b %Y')

fig.show()

# Number of publication by Month 

In [None]:
df = pd.read_excel("Data for data visuliazation_v4.xlsx")

In [None]:
df['Publication Date'] = pd.to_datetime(df['Publication Date'], format='%Y-%b')
start_date = '2023-01-01'
end_date = '2024-07-31'
filtered_df = df[(df['Publication Date'] >= start_date) & (df['Publication Date'] <= end_date)]

In [None]:
filtered_df['Publication Date'] = pd.to_datetime(filtered_df['Publication Date'], format='%Y-%b')

monthly_counts = (
    filtered_df.assign(Date=filtered_df['Publication Date'].dt.to_period('M').astype(str))
)
monthly_counts = monthly_counts.groupby('Date').size().reset_index(name='Count')

custom_colors = [
    '#055B69'
]

fig = px.line(
    monthly_counts,
    x='Date',
    y='Count',
    color_discrete_sequence=custom_colors, 
    title="<b>Number of Publications by Month</b>",  
    markers=True,
)

fig.update_traces(mode='lines+markers', marker=dict(size=11.5), line=dict(width=2.5),
                    hovertemplate='Date: %{x}<br>Count: %{y}<extra></extra>')

fig.update_layout(
    title_font_size=24,
    legend_font_size=12,
    legend_itemclick="toggleothers",
)

fig.update_layout(
    height=550,
    font=dict(family="Arial, sans-serif", size=12),  
    xaxis=dict(
        showgrid=True, 
        gridcolor='LightGray',
        title='<b>Publication Date</b>', 
        titlefont=dict(size=16, weight='bold'),
        tickfont=dict(size=14, color='black')   
    ),  
    yaxis=dict(
        showgrid=True, 
        gridcolor='LightGray',
        title='<b>Count</b>',  
        titlefont=dict(size=16, weight='bold'),
        tickfont=dict(size=14, color='black') 
    ),
    autosize=True
)

end_date_adjusted = pd.to_datetime(end_date) - pd.Timedelta(days=27)
fig.update_xaxes(range=[start_date, end_date_adjusted], tickangle=-45, dtick='M1', tickformat='%b %Y')

fig.show()

# Trend Chart for NLP Task

In [None]:
df = pd.read_excel("Data for data visuliazation_v4.xlsx")

In [None]:
tasks_list = df['NLP task Category'].str.split(', ')
all_tasks = [task for sublist in tasks_list for task in sublist]
unique_tasks = set(all_tasks)
print(unique_tasks)

In [None]:
df['Publication Date'] = pd.to_datetime(df['Publication Date'], format='%Y-%b')
start_date = '2023-01-01'
end_date = '2024-07-31'
filtered_df = df[(df['Publication Date'] >= start_date) & (df['Publication Date'] <= end_date)]

In [None]:
filtered_df['Date'] = filtered_df['Publication Date'].dt.to_period('M').astype(str)  

monthly_counts = (
    filtered_df.assign(NLP_task_Category=filtered_df['NLP task Category'].str.split(', '))
        .explode('NLP_task_Category')
)
monthly_counts['NLP_task_Category'] = monthly_counts['NLP_task_Category'].str.strip()
monthly_counts = monthly_counts.groupby(['Date', 'NLP_task_Category']).size().reset_index(name='Count')

custom_colors = [
    '#055B69',  
    '#852C2C',  
    '#618C95',  
    '#A97B50', 
    '#A96A8C',
    '#4A579B', 
    '#7F61B6',  
    '#639A5E', 
    '#822482'
]

fig = px.line(
    monthly_counts,
    x='Date',
    y='Count',
    color='NLP_task_Category',
    color_discrete_sequence=custom_colors,
    title="<b>NLP Tasks Trend Chart</b>",
    markers=True,
    text='NLP_task_Category',
    symbol='NLP_task_Category'
)

fig.update_traces(mode='lines+markers', marker=dict(size=11.5), line=dict(width=2.5),
                    hovertemplate='Date: %{x}<br>NLP Task: %{text}<br>Count: %{y}<extra></extra>')

fig.update_layout(
    title_font_size=24,
    legend_font_size=12,
    legend_itemclick="toggleothers",
    legend=dict(
        yanchor="top",
        y=1,  
        xanchor="left",
        x=1,  
        orientation="v", 
        font=dict(size=13),
        bgcolor="rgba(0, 0, 0, 0)", 
    ),
    legend_title=None, 
)

fig.update_layout(
    height=550,
    font=dict(family="Arial, sans-serif", size=12),  
    xaxis=dict(
        showgrid=True, 
        gridcolor='LightGray',
        title='<b>Publication Date</b>',  
        titlefont=dict(size=16, weight='bold'),
        tickfont=dict(size=14, color='black')
    ),  
    yaxis=dict(
        showgrid=True, 
        gridcolor='LightGray',
        title='<b>Count</b>',  
        titlefont=dict(size=16, weight='bold'),
        tickfont=dict(size=14, color='black')  
    ),
    autosize=True
)

end_date_adjusted = pd.to_datetime(end_date) - pd.Timedelta(days=27)
fig.update_xaxes(range=[start_date, end_date_adjusted], tickangle=-45, dtick='M1', tickformat='%b %Y')

fig.show()

# Total Count Data Visualization - World Map

In [None]:
df = pd.read_csv("country_counts.csv")

In [None]:
# Categorizing 'Total Count' into specific ranges for color coding
df['Color Category'] = pd.cut(
    df['Total Count'],
    bins=[0, 1, 3, 10, 30, 100, float('inf')],  # Defining the bins for categorization
    labels=["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]  
)

fig = px.choropleth(
    df,
    locations='Country', 
    locationmode='country names',  
    color='Color Category',  
    hover_name='Country', 
    title='<b>Total</b>', 
    color_discrete_map={ 
        "0-1": "#c4fff0",
        "1-3": "#9ae4d4",
        "3-10": "#7dd3be",
        "10-30": "#51a893",  
        "30-100": "#10624d",  
        "More than 100": "#033327"
    },
    category_orders={"Color Category": ["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]},  # Ensuring the color categories follow this order
    hover_data={'Total Count': True} 
)

fig.update_layout(
    title_font=dict(size=24, family='Arial, sans-serif'), 
    title_x=0.5, 
    legend_title="Number of Articles", 
    legend=dict(
        itemsizing="constant", 
        orientation="v",  
        xanchor="left",  
        x=0.1,  
        yanchor="bottom",
        y=0.2,  
        font=dict(size=18, family='Arial, sans-serif') 
    )
)

# Configuring the geographic settings for the map
fig.update_geos(
    showcountries=True, countrycolor="lightgray",  
    showsubunits=True, subunitcolor="lightgray",  
    showframe=False  
)

fig.update_layout( 
    height=700,  
    width=1300,  
    margin=dict(l=5, r=5, t=40, b=10) 
)
fig.show()  

# First Half of 2023 Data Visualization - World Map

In [None]:
# Categorizing 'Count 2023 First Half' into specific ranges for color coding
df['Color Category'] = pd.cut(
    df['Count 2023 First Half'],
    bins=[0, 1, 3, 10, 30, 100, float('inf')],  # Defining the bins for categorization
    labels=["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]  
)

fig = px.choropleth(
    df,
    locations='Country',  
    locationmode='country names',  
    color='Color Category', 
    hover_name='Country', 
    title='<b>January 2023 - June 2023</b>',  
    color_discrete_map={  
        "0-1": "#c4fff0",
        "1-3": "#9ae4d4",
        "3-10": "#7dd3be",
        "10-30": "#51a893",  
        "30-100": "#10624d",  
        "More than 100": "#033327"
    },
    category_orders={"Color Category": ["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]}, 
    hover_data={'Count 2023 First Half': True} 
)

fig.update_layout(
    title_font=dict(size=24, family="Arial, sans-serif"), 
    title_x=0.5,  
    legend_title="Number of Articles", 
    legend=dict(
        itemsizing="constant",  
        orientation="v",  
        xanchor="left", 
        x=0.1,  
        yanchor="bottom", 
        y=0.2,  
        font=dict(size=18, family="Arial, sans-serif")  
    )
)

fig.update_geos(
    showcountries=True, countrycolor="lightgray",  
    showsubunits=True, subunitcolor="lightgray",  
    showframe=False 
)

fig.update_layout( 
    height=700,  
    width=1300, 
    margin=dict(l=5, r=5, t=40, b=10),  
    showlegend=False 
)

fig.show()

# Second Half of 2023 Data Visualization - World Map

In [None]:
# Categorizing 'Count 2023 Second Half' into specific ranges for color coding
df['Color Category'] = pd.cut(
    df['Count 2023 Second Half'],
    bins=[0, 1, 3, 10, 30, 100, float('inf')],  
    labels=["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]
)

fig = px.choropleth(
    df,
    locations='Country',
    locationmode='country names',
    color='Color Category',  
    hover_name='Country',
    title='<b>July 2023 - December 2023</b>',
    color_discrete_map={
        "0-1": "#c4fff0",
        "1-3": "#9ae4d4",
        "3-10": "#7dd3be",
        "10-30": "#51a893",  
        "30-100": "#10624d",  
        "More than 100": "#033327"
    },
    category_orders={"Color Category": ["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]},
    hover_data={'Count 2023 Second Half': True}
)

fig.update_layout(
    title_font=dict(size=24, family="Arial, sans-serif"), 
    title_x=0.5,
    legend_title="Number of Articles",
    legend=dict(
        itemsizing="constant",
        orientation="v",
        xanchor="left",
        x=0.1,  
        yanchor="bottom",
        y=0.2,
        font=dict(size=18, family="Arial, sans-serif")
    )
)

fig.update_geos(
    showcountries=True, countrycolor="lightgray",
    showsubunits=True, subunitcolor="lightgray",
    showframe=False  
)

fig.update_layout( 
    height=700,
    width=1300,
    margin=dict(l=5, r=5, t=40, b=10),
    showlegend=False
)

fig.show()

# First Half of 2024 Data Visualization - World Map

In [None]:
# Categorizing 'Count 2024 First Half' into specific ranges for color coding
df['Color Category'] = pd.cut(
    df['Count 2024 First Half'],
    bins=[0, 1, 3, 10, 30, 100, float('inf')],  
    labels=["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]
)

fig = px.choropleth(
    df,
    locations='Country',
    locationmode='country names',
    color='Color Category',  
    hover_name='Country',
    title='<b>January 2024 - June 2024</b>',
    color_discrete_map={
        "0-1": "#c4fff0",
        "1-3": "#9ae4d4",
        "3-10": "#7dd3be",
        "10-30": "#51a893",  
        "30-100": "#10624d",  
        "More than 100": "#033327"
    },
    category_orders={"Color Category": ["0-1", "1-3", "3-10", "10-30", "30-100", "More than 100"]},
    hover_data={'Count 2024 First Half': True}
)

fig.update_layout(
    title_font=dict(size=24, family="Arial, sans-serif"), 
    title_x=0.5,
    legend_title="Number of Articles",
    legend=dict(
        itemsizing="constant",
        orientation="v",
        xanchor="left",
        x=0.1,  
        yanchor="bottom",
        y=0.2,
        font=dict(size=18, family="Arial, sans-serif")
    )
)

fig.update_geos(
    showcountries=True, countrycolor="lightgray",
    showsubunits=True, subunitcolor="lightgray",
    showframe=False  
)

fig.update_layout( 
    height=700,
    width=1300,
    margin=dict(l=5, r=5, t=40, b=10),
    showlegend=False
)

fig.show()