In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Install: pip install transformers torch scikit-learn
# Optional CUDA Support for GPU : pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Model and Tokenizer (choose a suitable pre-trained model)
model_name = "distilbert-base-uncased"  # Faster, good balance of speed and performance
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4) # 0: Positive, 1: Negative, 2: Neutral, 3: Propaganda

texts = [
    "This product is amazing!",  # Positive
    "This is a terrible experience.",  # Negative
    "This is just a regular update.",  # Neutral
    "Vote for X, they are the only ones who can save us!",  # Propaganda
    "This is a lie spread by the enemy.", #Propaganda
    "The weather is nice today.", #Neutral
    "I am very disappointed with this service.", #Negative
    "This movie is fantastic!", #Positive
]
labels = [0, 1, 2, 3, 3, 2, 1, 0]

# Tokenization and Data Splitting
encoded_texts = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
train_texts, test_texts, train_labels, test_labels = train_test_split(
    encoded_texts["input_ids"], labels, test_size=0.2, random_state=42
)

# Training Loop (simplified)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device) #Use GPU if available

model.train()
for epoch in range(3):
    optimizer.zero_grad()
    input_ids = train_texts.to(device)
    labels_tensor = torch.tensor(train_labels).to(device)
    outputs = model(input_ids, labels=labels_tensor)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

# Evaluation
model.eval()
with torch.no_grad():
    input_ids = test_texts.to(device)
    test_outputs = model(input_ids)
    predictions = torch.argmax(test_outputs.logits, dim=-1).cpu().numpy()

print(classification_report(test_labels, predictions))
print("Accuracy:", accuracy_score(test_labels, predictions))

def analyze_text(text):
    encoded_text = tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(device)

    with torch.no_grad():
        output = model(**encoded_text)
        probabilities = torch.nn.functional.softmax(output.logits, dim=-1)[0].cpu().numpy()
        predicted_class = torch.argmax(output.logits, dim=-1).item()
        return predicted_class, probabilities

#Example usage
text_to_analyze = "This product is a scam and you should avoid it at all costs!"
predicted_class, probabilities = analyze_text(text_to_analyze)
print(f"Text: {text_to_analyze}")
print(f"Predicted Class: {predicted_class} (0: Pos, 1: Neg, 2: Neu, 3: Prop)")
print(f"Probabilities: {probabilities}")

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       1.0
           2       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Accuracy: 0.0
Text: This product is a scam and you should avoid it at all costs!
Predicted Class: 3 (0: Pos, 1: Neg, 2: Neu, 3: Prop)
Probabilities: [0.22659507 0.27351382 0.20873888 0.29115215]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
!pip install dash plotly

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_compo

In [None]:

! pip install dash plotly
import dash
from dash import dcc, html
import plotly.graph_objects as go
import numpy as np


app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Real-Time Broadcast Analysis"),
    dcc.Input(id="input-text", type="text", placeholder="Enter text to analyze..."),
    html.Div(id="analysis-results"),
    dcc.Graph(id="probabilities-graph"),
])

@app.callback(
    [dash.dependencies.Output("analysis-results", "children"),
     dash.dependencies.Output("probabilities-graph", "figure")],
    [dash.dependencies.Input("input-text", "value")],
)
def update_analysis(input_text):
    if input_text:
        predicted_class, probabilities = analyze_text(input_text)
        class_names = ["Positive", "Negative", "Neutral", "Propaganda"]
        result_text = f"Analysis: {input_text} - Predicted Class: {class_names[predicted_class]}"
        fig = go.Figure(data=[go.Bar(x=class_names, y=probabilities)])
        return result_text, fig
    else:
        return "", go.Figure()

if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import dash
from dash import dcc, html, Output, Input
import plotly.graph_objects as go
import numpy as np

# ... (Your existing code for model loading, data preparation, and training) ...

def analyze_text(text):
    # ... (Your existing analyze_text function) ...
    # The following lines were not indented properly:
    encoded_text = tokenizer(text, padding=True, truncation=True, return_tensors="pt").to(device)
    # Add attention_mask to the model call
    with torch.no_grad():
        output = model(**encoded_text) # Pass encoded_text as a dictionary
        probabilities = torch.nn.functional.softmax(output.logits, dim=-1)[0].cpu().numpy()
        predicted_class = torch.argmax(output.logits, dim=-1).item()
        return predicted_class, probabilities


# Dash App Improvements
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Real-Time Text Analysis"),  # Clearer title
    dcc.Input(id="input-text", type="text", placeholder="Enter text here...", style={'width': '80%'}), # Wider input
    html.Div(id="analysis-results", style={'margin-top': '20px'}),  # Add margin
    dcc.Graph(id="probabilities-graph", style={'height': '400px'}), # Fixed height
    html.Div(id='output-prediction') # Add a new output for the predicted class
])


@app.callback(
    [Output("analysis-results", "children"),
     Output("probabilities-graph", "figure"),
     Output('output-prediction', 'children')], # Add the new output
    Input("input-text", "value"),
)
def update_analysis(input_text):
    if input_text:
        predicted_class, probabilities = analyze_text(input_text)
        class_names = ["Positive", "Negative", "Neutral", "Propaganda"]

        # Improved output formatting
        result_text = html.Div([
            html.H3(f"Analysis of: '{input_text}'"),
            html.P(f"Predicted Class: {class_names[predicted_class]}"), # Show class name
        ])

        # Enhanced bar chart
        fig = go.Figure(data=[go.Bar(x=class_names, y=probabilities, marker_color=['green', 'red', 'blue', 'orange'])])
        fig.update_layout(title_text="Class Probabilities", xaxis_title="Class", yaxis_title="Probability")

        # Output for the predicted class
        predicted_class_output = html.H4(f"Predicted Class: {class_names[predicted_class]}")

        return result_text, fig, predicted_class_output
    else:
        return "", go.Figure(), "" # Return empty string for the new output as well


if __name__ == "__main__":
    app.run_server(debug=True)

<IPython.core.display.Javascript object>