<h2>Dashboards </h2>

<h3> Importing Packages </h3>

In [1]:
# Importing Packages
import dash
from dash import dcc
from dash import html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
from langdetect import detect
from langdetect import lang_detect_exception
from dash_bootstrap_templates import ThemeChangerAIO, template_from_url
 

<h3> Data Cleaning </h3>

In [2]:
#Importing Dataset
Tweet_Train = pd.DataFrame(pd.read_csv('twitter_training.csv'))
#To rename the columns of the Tweet_Train and Tweet_Test DataFrames. 
headers=['Tweet ID', 'Entity', 'Sentiment', 'Content']
Tweet_Train.columns = headers

In [3]:
#To remove duplicate values
Tweet_Train[Tweet_Train.duplicated(subset=['Content'], keep=False)]
Tweet_Train_Clean= Tweet_Train.drop_duplicates(subset=['Content'], keep='first')
#To remove missing values
Tweet_Train_Clean=Tweet_Train_Clean.dropna()
#To get the number of rows and columns in the clean dataset

<h3> Data Transformation </h3>

In [4]:
# To create a function to detect the language of a tweet and map to full language names
def detect_language(tweet):
    try:
        lang_code = detect(tweet)
        # Create a dictionary to map language abbreviations to full names
        language_mapping = {
            "af": "Afrikaans",
            "bg": "Bulgarian",
            "ca": "Catalan",
            "cs": "Czech",
            "cy": "Welsh",
            "da": "Danish",
            "de": "German",
            "en": "English",
            "es": "Spanish",
            "et": "Estonian",
            "fi": "Finnish",
            "fr": "French",
            "hr": "Croatian",
            "hu": "Hungarian",
            "id": "Indonesian",
            "it": "Italian",
            "lt": "Lithuanian",
            "lv": "Latvian",
            "mk": "Macedonian",
            "nl": "Dutch",
            "no": "Norwegian",
            "pl": "Polish",
            "pt": "Portuguese",
            "ro": "Romanian",
            "ru": "Russian",
            "sk": "Slovak",
            "sl": "Slovenian",
            "so": "Somali",
            "sq": "Albanian",
            "sv": "Swedish",
            "sw": "Swahili",
            "th": "Thai",
            "tl": "Tagalog",
            "tr": "Turkish",
            "uk": "Ukrainian",
            "vi": "Vietnamese"
        }
        return language_mapping.get(lang_code, "Unknown")
    except lang_detect_exception.LangDetectException:
        return "Unknown"

# Apply the function to each tweet in the "Content" column and create a new "Language" column
Tweet_Train_Clean["Language"] = Tweet_Train_Clean["Content"].apply(detect_language)

<h3> Entity Dashboard </h3>

In [50]:
dbc_css_1 = ("https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates@V1.0.2/dbc.min.css")

# Create Dash app with VAPOR theme
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.VAPOR, dbc_css_1])

# Language filter options
language_options = [{'label': lang, 'value': lang} for lang in Tweet_Train_Clean['Language'].unique()]

# Define a function to create KPIs
def create_kpi(value, label):
    return html.Div([
        html.Div(value, className="kpi-value"),
        html.Div(label, className="kpi-label"),
    ], className="kpi")

#Initalizing Variables
entity_count = len(Tweet_Train_Clean['Entity'])
positive_count = len(Tweet_Train_Clean[Tweet_Train_Clean['Sentiment'] == 'Positive'])
negative_count = len(Tweet_Train_Clean[Tweet_Train_Clean['Sentiment'] == 'Negative'])

# Set up the app layout
app.layout = dbc.Container([
    html.H1("Entity Dashboard", className="mb-4"),  
    dbc.Row([
        dbc.Col([
            dcc.Dropdown(
                id='language-filter',
                options=[{'label': 'All Languages', 'value': 'All'}] + language_options,
                value=['All'],
                multi=True,
            ),
            html.P("Select one or more languages:", className="mb-2 text-muted"),
        ],
        width=3),
    ]),
    dbc.Col([
        dbc.Row([
            dbc.Col(html.P("Total Entities:", className="mb-2 text-muted"),width=4),
            dbc.Col(html.P("Total Positive Sentiments:", className="mb-2 text-muted"),width=4),
            dbc.Col(html.P("Total Negative Sentiments:", className="mb-2 text-muted"),width=4),
        ], className="mb-"),
        dbc.Row([
            dbc.Col(create_kpi(f"{entity_count}", ""), width=4, id= 'entity-kpi'),
            dbc.Col(create_kpi(f"{positive_count}", ""), width=4, id= 'positive-kpi'),
            dbc.Col(create_kpi(f"{negative_count}", ""), width=4, id= 'negative-kpi'),
        ], className="mb-2"),
        dbc.Row([
            dbc.Col(dcc.Graph(id='Positive-Entity-Count'), width={"size": 6, "offset": 0}),
            dbc.Col(dcc.Graph(id='Negative-Entity-Count'), width={"size": 6, "offset": 0}),
        ]),
        dbc.Row([
            dbc.Col(dcc.Graph(id='Neutral-or-Irrelevant-Entity-Count'), width={"size": 6, "offset": 0}),
            dbc.Col(dcc.Graph(id='Total-Entity-Count'), width={"size": 6, "offset": 0}),
        ]),
    ],
    width=12),
    ],
    fluid=True,
    className="dbc dbc-ag-grid"
)

# Define a function to update the visualizations and KPIs
def update_visualizations(selected_languages):
    # Filter data based on selected languages
    if 'All' in selected_languages:
        filtered_data = Tweet_Train_Clean
    else:
        filtered_data = Tweet_Train_Clean[Tweet_Train_Clean['Language'].isin(selected_languages)] 
    
    # Calculate entity_count, positive_count, and negative_count based on your data
    entity_count = len(filtered_data['Entity'])
    positive_count = len(filtered_data[filtered_data['Sentiment'] == 'Positive'])
    negative_count = len(filtered_data[filtered_data['Sentiment'] == 'Negative'])
  

    # Create Positive Tweet line chart
    # Calculate the total number of positive sentiment tweets for each entity
    positive_tweets_by_entity_l1 = filtered_data[filtered_data['Sentiment'] == 'Positive'].groupby('Entity').size().reset_index(name='Positive Tweets by Entity')

    # Calculate the total number of tweets for each entity
    total_tweets_by_entity_l1 = filtered_data.groupby('Entity').size().reset_index(name='Total Tweets by Entity')

    # Merge the two dataframes based on the 'Language' column
    merged_data_en_l1 = total_tweets_by_entity_l1.merge(positive_tweets_by_entity_l1, on='Entity')

    # Calculate the percentage of positive sentiment tweets relative to total tweets for each entity
    merged_data_en_l1['Positive Sentiment Percentage'] = (merged_data_en_l1['Positive Tweets by Entity'] / merged_data_en_l1['Total Tweets by Entity']) * 100

    # Create the line chart
    positive_entity_fig = px.line(
    merged_data_en_l1,
    x='Entity',
    y='Positive Sentiment Percentage',
    title='Positive Sentiments Percentage by Entity',
    markers=True
    )

    # Create Negative Tweets line chart
    # Calculate the total number of negative sentiment tweets for each entity
    negative_tweets_by_entity_l2 = filtered_data[filtered_data['Sentiment'] == 'Negative'].groupby('Entity').size().reset_index(name='Negative Tweets by Entity')

    # Calculate the total number of tweets for each entity
    total_tweets_by_entity_l2 = filtered_data.groupby('Entity').size().reset_index(name='Total Tweets by Entity')

    # Merge the two dataframes based on the 'Entity' column
    merged_data_en_l2 = total_tweets_by_entity_l2.merge(negative_tweets_by_entity_l2, on='Entity')

    # Calculate the percentage of negative sentiment tweets relative to total tweets for each entity
    merged_data_en_l2['Negative Sentiment Percentage'] = (merged_data_en_l2['Negative Tweets by Entity'] / merged_data_en_l2['Total Tweets by Entity']) * 100

    # Create the line chart
    negative_entity_fig = px.line(
    merged_data_en_l2,
    x='Entity',
    y='Negative Sentiment Percentage',
    title='Negative Sentiments Percentage by Entity',
    markers=True
    )

    # Create Neutral or Irrelevant Tweet Line Chart
    # Calculate the total number of neutral or irrelevant sentiment tweets for each entity
    neutral_irrelevant_tweets_by_entity_l3 = filtered_data[filtered_data['Sentiment'].isin(['Neutral','Irrelevant'])].groupby('Entity').size().reset_index(name='Neutral or Irrelevant Tweets by Entity')

    # Calculate the total number of tweets for each entity
    total_tweets_by_entity_l3 = filtered_data.groupby('Entity').size().reset_index(name='Total Tweets by Entity')

    # Merge the two dataframes based on the 'Entity' column
    merged_data_en_l3 = total_tweets_by_entity_l3.merge(neutral_irrelevant_tweets_by_entity_l3, on='Entity')

    # Calculate the percentage of neutral or irrelevant sentiment tweets relative to total tweets for each entity
    merged_data_en_l3['Neutral or Irrelevant Sentiment Percentage'] = (merged_data_en_l3['Neutral or Irrelevant Tweets by Entity'] / merged_data_en_l3['Total Tweets by Entity']) * 100

    # Create the line chart
    neutral_irrelevant_entity_fig = px.line(
    merged_data_en_l3,
    x='Entity',
    y='Neutral or Irrelevant Sentiment Percentage',
    title='Neutral or Irrelevant Sentiments Percentage by Entity',
    markers=True
    )

    total_entity_fig = px.line(
        filtered_data.groupby(['Entity']).size().reset_index(name='Overall Sentiments'),
        x='Entity',
        y='Overall Sentiments',
        title='Entity with Overall Sentiments',
        markers=True
    )
    # Adjusting the outlook of the figures
    for fig in [positive_entity_fig, negative_entity_fig, neutral_irrelevant_entity_fig, total_entity_fig]:
     fig.update_xaxes(showgrid=False)
     fig.update_yaxes(showgrid=False)
     fig.update_layout(
        plot_bgcolor='rgb(26,9,51)',
        paper_bgcolor='rgb(26,9,51)',
        title_font=dict(size=20),
        title_x=0.5,
        font=dict(
            family="Calibri",  # Set the font family
            size=12,  # Set the font size
            color="rgb(50,251,226)"  # Set the text color (e.g., "red")
        )
    )
     fig.update_traces(
        line=dict(color='rgb(50,251,226)'),  # Change line color to a specific RGB value
        marker=dict(color='rgb(50,251,226)')  # Change marker color to a specific RGB value
    )
    return positive_entity_fig, negative_entity_fig, neutral_irrelevant_entity_fig, total_entity_fig, entity_count, positive_count, negative_count

# Update the visualizations based on the language filter
@app.callback(
    [Output('Positive-Entity-Count', 'figure'),
     Output('Negative-Entity-Count', 'figure'),
     Output('Neutral-or-Irrelevant-Entity-Count', 'figure'),
     Output('Total-Entity-Count', 'figure'),
     Output('entity-kpi', 'children'),
     Output('positive-kpi', 'children'),
     Output('negative-kpi', 'children')],
    [Input('language-filter', 'value')]
)
def update_visualizations_callback(selected_languages):
    return update_visualizations(selected_languages)

if __name__ == '__main__':
    app.run_server(debug=False, host='127.0.0.1', port=8051) #URL: http://127.0.0.1:8051/


<h3> Language Dashboard </h3>

In [52]:
dbc_css_2 = ("https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates@V1.0.2/dbc.min.css")

# Create Dash app with VAPOR theme
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.VAPOR, dbc_css_2])

# Entity filter options
entity_options = [{'label': entity, 'value': entity} for entity in Tweet_Train_Clean['Entity'].unique()]

# Language filter options with only "All" and "Exclude English"
language_options_2 = [{'label': 'All', 'value': 'All'}, {'label': 'Exclude English', 'value': 'ExcludeEnglish'}]

# Define a function to create KPIs
def create_kpi(value, label):
    return html.Div([
        html.Div(value, className="kpi-value"),
        html.Div(label, className="kpi-label"),
    ], className="kpi")

#Initalizing Variables
language_count = len(Tweet_Train_Clean['Language'])
positive_count_2 = len(Tweet_Train_Clean[Tweet_Train_Clean['Sentiment'] == 'Positive'])
negative_count_2 = len(Tweet_Train_Clean[Tweet_Train_Clean['Sentiment'] == 'Negative'])

# Set up the app layout
app.layout = dbc.Container([
    html.H1("Language Dashboard", className="mb-4"),  
    dbc.Row([
        dbc.Col([
            dcc.Dropdown(
                id='entity-filter',
                options=[{'label': 'All Entities', 'value': 'All'}] + entity_options,
                value=['All'],
                multi=True,
            ),
            html.P("Select one or more entities:", className="mb-2 text-muted"),
        ],
        width=3),
    dbc.Col([
            dcc.Dropdown(
                id='language-filter-2',
                options=language_options_2,
                value=['All'],
                multi=False,
            ),
            html.P("Exclude English if needed:", className="mb-2 text-muted"),
        ], width=3),
    ]),

    dbc.Col([
        dbc.Row([
            dbc.Col(html.P("Total Languages:", className="mb-2 text-muted"),width=4),
            dbc.Col(html.P("Total Positive Sentiments:", className="mb-2 text-muted"),width=4),
            dbc.Col(html.P("Total Negative Sentiments:", className="mb-2 text-muted"),width=4),
        ], className="mb-1"),
        dbc.Row([
            dbc.Col(create_kpi(f"{language_count}", ""), width=4,  id='language-kpi'),
            dbc.Col(create_kpi(f"{positive_count_2}", ""), width=4, id='positive-kpi-2'),
            dbc.Col(create_kpi(f"{negative_count_2}", ""), width=4, id='negative-kpi-2'),
        ], className="mb-4"),
        dbc.Row([
            dbc.Col(dcc.Graph(id='Positive-Language-Count'), width={"size": 6, "offset": 0}),
            dbc.Col(dcc.Graph(id='Negative-Language-Count'), width={"size": 6, "offset": 0}),
        ]),
        dbc.Row([
            dbc.Col(dcc.Graph(id='Neutral-or-Irrelevant-Language-Count'), width={"size": 6, "offset": 0}),
            dbc.Col(dcc.Graph(id='Total-Language-Count'), width={"size": 6, "offset": 0}),
        ]),
    ],
    width=12),
    ],
    fluid=True,
    className="dbc dbc-ag-grid"
)

# Define a function to update the visualizations and KPIs
def update_visualizations_2(selected_entities, selected_languages_2):
    # Filter data based on selected entities and language
    if 'All' in selected_entities:
        filtered_data = Tweet_Train_Clean
    else:
        filtered_data = Tweet_Train_Clean[Tweet_Train_Clean['Entity'].isin(selected_entities)]

    if selected_languages_2 == 'ExcludeEnglish':
        filtered_data = filtered_data[filtered_data['Language'] != 'English']

    # Calculate language_count, positive_count, and negative_count based on your data
    language_count = len(filtered_data['Language'])
    positive_count = len(filtered_data[filtered_data['Sentiment'] == 'Positive'])
    negative_count = len(filtered_data[filtered_data['Sentiment'] == 'Negative'])
    
    # Create Positive Tweet line chart
    # Calculate the total number of positive sentiment tweets for each language
    positive_tweets_by_language_l1 = filtered_data[filtered_data['Sentiment'] == 'Positive'].groupby('Language').size().reset_index(name='Positive Tweets by Language')

    # Calculate the total number of tweets for each language
    total_tweets_by_language_l1 = filtered_data.groupby('Language').size().reset_index(name='Total Tweets by Language')

    # Merge the two dataframes based on the 'Language' column
    merged_data_l1 = total_tweets_by_language_l1.merge(positive_tweets_by_language_l1, on='Language')

    # Calculate the percentage of positive sentiment tweets relative to total tweets for each language
    merged_data_l1['Positive Sentiment Percentage'] = (merged_data_l1['Positive Tweets by Language'] / merged_data_l1['Total Tweets by Language']) * 100

    # Create the line chart
    positive_language_fig = px.line(
    merged_data_l1,
    x='Language',
    y='Positive Sentiment Percentage',
    title='Positive Sentiments Percentage by Langugage',
    markers=True
    )


    # Create Negative Tweets line chart
    # Calculate the total number of negative sentiment tweets for each language
    negative_tweets_by_language_l2 = filtered_data[filtered_data['Sentiment'] == 'Negative'].groupby('Language').size().reset_index(name='Negative Tweets by Language')

    # Calculate the total number of tweets for each language
    total_tweets_by_language_l2 = filtered_data.groupby('Language').size().reset_index(name='Total Tweets by Language')

    # Merge the two dataframes based on the 'Language' column
    merged_data_l2 = total_tweets_by_language_l2.merge(negative_tweets_by_language_l2, on='Language')

    # Calculate the percentage of negative sentiment tweets relative to total tweets for each language
    merged_data_l2['Negative Sentiment Percentage'] = (merged_data_l2['Negative Tweets by Language'] / merged_data_l2['Total Tweets by Language']) * 100

    # Create the line chart
    negative_language_fig = px.line(
    merged_data_l2,
    x='Language',
    y='Negative Sentiment Percentage',
    title='Negative Sentiments Percentage by Langugage',
    markers=True
    )

    # Create Neutral or Irrelevant Tweet Line Chart
    # Calculate the total number of neutral or irrelevant sentiment tweets for each language
    neutral_irrelevant_tweets_by_language_l3 = filtered_data[filtered_data['Sentiment'].isin(['Neutral','Irrelevant'])].groupby('Language').size().reset_index(name='Neutral or Irrelevant Tweets by Language')

    # Calculate the total number of tweets for each language
    total_tweets_by_language_l3 = filtered_data.groupby('Language').size().reset_index(name='Total Tweets by Language')

    # Merge the two dataframes based on the 'Language' column
    merged_data_l3 = total_tweets_by_language_l3.merge(neutral_irrelevant_tweets_by_language_l3, on='Language')

    # Calculate the percentage of neutral or irrelevant sentiment tweets relative to total tweets for each language
    merged_data_l3['Neutral or Irrelevant Sentiment Percentage'] = (merged_data_l3['Neutral or Irrelevant Tweets by Language'] / merged_data_l3['Total Tweets by Language']) * 100

    # Create the line chart
    neutral_irrelevant_language_fig = px.line(
    merged_data_l3,
    x='Language',
    y='Neutral or Irrelevant Sentiment Percentage',
    title='Neutral or Irrelevant Sentiments Percentage by Language',
    markers=True
    )

    total_language_fig = px.line(
        filtered_data.groupby(['Language']).size().reset_index(name='Overall Sentiments'),
        x='Language',
        y='Overall Sentiments',
        title='Overall Sentiments by Language',
        markers=True
    )
    # Adjusting the outlook of the figures
    for fig in [positive_language_fig, negative_language_fig, neutral_irrelevant_language_fig, total_language_fig]:
     fig.update_xaxes(showgrid=False)
     fig.update_yaxes(showgrid=False)
     fig.update_layout(
        plot_bgcolor='rgb(26,9,51)',
        paper_bgcolor='rgb(26,9,51)',
        title_font=dict(size=20),
        title_x=0.5,
        font=dict(
            family="Calibri",  # Set the font family
            size=12,  # Set the font size
            color="rgb(50,251,226)"  # Set the text color
        )
    )
     fig.update_traces(
        line=dict(color='rgb(50,251,226)'),  # Change line color to a specific RGB value
        marker=dict(color='rgb(50,251,226)')  # Change marker color to a specific RGB value
    )
    return positive_language_fig, negative_language_fig, neutral_irrelevant_language_fig, total_language_fig,language_count, positive_count, negative_count

# Update the visualizations based on the language filter
@app.callback(
    [Output('Positive-Language-Count', 'figure'),
     Output('Negative-Language-Count', 'figure'),
     Output('Neutral-or-Irrelevant-Language-Count', 'figure'),
     Output('Total-Language-Count', 'figure'),
     Output('language-kpi', 'children'),
     Output('positive-kpi-2', 'children'),
     Output('negative-kpi-2', 'children')],
    [Input('entity-filter', 'value'),
     Input('language-filter-2', 'value')]
)
def update_visualizations_callback_2(selected_entities, selected_lanuages_2):
    return update_visualizations_2(selected_entities, selected_lanuages_2)

if __name__ == '__main__':
    app.run_server(debug=False, host='127.0.0.1', port=8052) #URL: http://127.0.0.1:8052/

<h3> Combined Dashboard </h3>

In [7]:
dbc_css_3 = ("https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates@V1.0.2/dbc.min.css")

# Create Dash app with VAPOR theme
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.VAPOR, dbc_css_3])

# Define the sidebar layout
sidebar = html.Div(
    [
        html.H2("Sidebar", className="display-4"),
        html.Hr(),
        html.P("Choose a Dashboard:", className="lead"),
        dcc.Link("Entity Dashboard", href="/entity-dashboard", style={"color": "rgb(50,251,226)"}),
        html.Br(),
        dcc.Link("Language Dashboard", href="/language-dashboard", style={"color": "rgb(50,251,226)"}),
    ],
    style={
        "position": "fixed",
        "top": 0,
        "left": 0,
        "bottom": 0,
        "width": "16rem",
        "padding": "2rem 1rem",
        "background-color": "rgb(26,9,51)",  # To change the background color
        "color": "rgb(50,251,226)",  # To change the text color
        "border": "2px solid rgb(50,251,226)", #To add a coloured border
    },
)

# Define the app layout
app.layout = html.Div([
    dcc.Location(id="url"),
    sidebar,
    html.Div(id="page-content", style={"position": "absolute", "left": "18rem", "top": 0, "right": 0, "bottom": 0}),
])

@app.callback(
    Output("page-content", "children"),
    [Input("url", "pathname")]
)
def display_page(pathname):
    if pathname == "/entity-dashboard":
        return html.Iframe(src="http://127.0.0.1:8051/", style={"width": "100%", "height": "100vh"})
    elif pathname == "/language-dashboard":
        return html.Iframe(src="http://127.0.0.1:8052/", style={"width": "100%", "height": "100vh"})
    else:
        return "Choose a Dashboard"

if __name__ == "__main__":
    app.run_server(debug=True, port=8053)
