In [1]:
import base64
import io
from wordcloud import WordCloud
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output, State, callback
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment").to(device)

def predict_sentiment(texts, batch_size=128):
    predictions = []
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
        batch_predictions = torch.argmax(probabilities, dim=1)
        predictions.extend(batch_predictions.cpu())
    categories = ['Negative', 'Neutral', 'Positive']
    return [categories[prediction] for prediction in predictions]

# Utility function to convert PIL image to data URI
def pil_to_b64(img, enc_format='png', **kwargs):
    buff = io.BytesIO()
    img.save(buff, format=enc_format, **kwargs)
    encoded = base64.b64encode(buff.getvalue()).decode("utf-8")
    return f"data:image/{enc_format};base64, " + encoded

# Setup the app
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col([
            html.H1("Twitter Sentiment Analysis Dashboard"),
            dcc.Upload(
                id='upload-data',
                children=html.Div(['Drag and Drop or ', html.A('Select Files')]),
                style={'width': '100%', 'height': '60px', 'lineHeight': '60px',
                       'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px',
                       'textAlign': 'center', 'margin': '10px'},
                multiple=False
            ),
            dcc.Dropdown(id='column-dropdown', placeholder='Select column for analysis'),
            dbc.Button("Analyze Sentiment with BERT", id="btn-analyze", color="success", disabled=True),
            dcc.Loading(id="loading-1", type="default", children=html.Div(id="loading-output-1"))
        ], width=4),
        dbc.Col([
            dbc.Table(id='data-preview', striped=True, bordered=True, hover=True),
            html.Div(id='image-output')
        ], width=8)
    ])
])

@app.callback(
    [Output('column-dropdown', 'options'),
     Output('btn-analyze', 'disabled')],
    Input('upload-data', 'contents'),
    prevent_initial_call=True
)
def update_column_dropdown(contents):
    if contents:
        _, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        options = [{'label': col, 'value': col} for col in df.columns]
        return options, False
    return [], True

@app.callback(
    Output('data-preview', 'children'),
    Input('upload-data', 'contents'),
    prevent_initial_call=True
)
def display_data_preview(contents):
    if contents:
        _, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        return dbc.Table.from_dataframe(df.head(), striped=True, bordered=True, hover=True)

@app.callback(
    [Output('image-output', 'children'),  
     Output('loading-output-1', 'children')],
    [Input('btn-analyze', 'n_clicks')],
    [State('upload-data', 'contents'),
     State('column-dropdown', 'value')],
    prevent_initial_call=True
)
def perform_analysis(n_clicks, contents, selected_column):
    if n_clicks and contents and selected_column:
        _, content_string = contents.split(',')
        decoded = base64.b64decode(content_string)
        df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        texts = df[selected_column].tolist()
        sentiments = predict_sentiment(texts)
        df['Sentiment'] = sentiments  

        # Generating word clouds based on sentiments
        positive_text = " ".join(df[df['Sentiment'] == 'Positive'][selected_column].astype(str).tolist())
        neutral_text = " ".join(df[df['Sentiment'] == 'Neutral'][selected_column].astype(str).tolist())
        negative_text = " ".join(df[df['Sentiment'] == 'Negative'][selected_column].astype(str).tolist())

        positive_wc = WordCloud(width=800, height=400, background_color='white', colormap='Greens', collocations=False).generate(positive_text)
        neutral_wc = WordCloud(width=800, height=400, background_color='white', colormap='Blues', collocations=False).generate(neutral_text)
        negative_wc = WordCloud(width=800, height=400, background_color='white', colormap='Reds', collocations=False).generate(negative_text)

        # Convert PIL images to data URIs
        positive_img = pil_to_b64(positive_wc.to_image())
        neutral_img = pil_to_b64(neutral_wc.to_image())
        negative_img = pil_to_b64(negative_wc.to_image())

        # Create HTML images to display
        images_html = html.Div([
            html.Div([html.Img(src=positive_img, style={'width': '100%', 'height': '100%'}), html.P("Positive Sentiment")], className="four columns"),
            html.Div([html.Img(src=neutral_img, style={'width': '100%', 'height': '100%'}), html.P("Neutral Sentiment")], className="four columns"),
            html.Div([html.Img(src=negative_img, style={'width': '100%', 'height': '100%'}), html.P("Negative Sentiment")], className="four columns")
        ], className="row")

        return images_html, f"Analysis completed for {len(df)} records."

    return None, "No data to analyze or column not selected."


app.run_server(mode='inline')


The dash_table package is deprecated. Please replace
`import dash_table` with `from dash import dash_table`

Also, if you're using any of the table format helpers (e.g. Group), replace 
`from dash_table.Format import Group` with 
`from dash.dash_table.Format import Group`
  import dash_table


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

NameError: name 'app' is not defined