In [0]:
pip install transformers sentencepiece torch --upgrade langchain pydantic==1.8 mlflow

Python interpreter will be restarted.
Collecting transformers
  Downloading transformers-4.33.0-py3-none-any.whl (7.6 MB)
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting torch
  Downloading torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl (619.9 MB)
Collecting langchain
  Downloading langchain-0.0.281-py3-none-any.whl (1.6 MB)
Collecting pyyaml>=5.1
  Using cached PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (738 kB)
Collecting safetensors>=0.3.1
  Using cached safetensors-0.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting regex!=2019.12.17
  Using cached regex-2023.8.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (771 kB)
Collecting huggingface-hub<1.0,>=0.15.1
  Using cached huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Using cached tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.ma

In [0]:
import mlflow
import mlflow.pyfunc
import mlflow.pytorch
from transformers import pipeline
import pandas as pd



In [0]:
# Load the sentiment analysis models
model_1 = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")
model_2 = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")

# Load your dataset (replace 'your_dataset.csv' with your actual dataset file)
test_data = pd.read_csv('/dbfs/FileStore/tables/test_df.csv')

# Perform one-hot encoding
df = pd.get_dummies(test_data, columns=['sentiment'], prefix='', prefix_sep='')

# Define a function to evaluate a model on the dataset
def evaluate_model(texts, model):
    sentiment_labels = []

    for text in texts:
        # Ensure that 'text' is a string, or convert it to a string if it's not
        if not isinstance(text, str):
            text = str(text)

        # Get the model's predictions
        prediction = model(text)
        sentiment_label = prediction[0]['label']

        sentiment_labels.append(sentiment_label)

    return sentiment_labels

# Evaluate "cardiffnlp/twitter-xlm-roberta-base-sentiment" alone
sentiment_labels_1 = evaluate_model(df['text'], model_1)

# Evaluate "cardiffnlp/twitter-roberta-base-sentiment-latest" alone
sentiment_labels_2 = evaluate_model(df['text'], model_2)

# Define a function to perform ensemble evaluation
def ensemble_evaluate(texts, model_1, model_2):
    sentiment_labels_ensemble = []

    for text in texts:
        # Ensure that 'text' is a string, or convert it to a string if it's not
        if not isinstance(text, str):
            text = str(text)

        # Get predictions from both models
        prediction_1 = model_1(text)
        prediction_2 = model_2(text)

        # Combine the predictions using a weighted average or majority vote
        sentiment_score_1 = prediction_1[0]['score']
        sentiment_score_2 = prediction_2[0]['score']
        combined_sentiment_score = (sentiment_score_1 + sentiment_score_2) / 2

        # Choose the label based on the combined score
        combined_sentiment_label = prediction_1[0]['label'] if combined_sentiment_score >= 0.5 else prediction_2[0]['label']

        sentiment_labels_ensemble.append(combined_sentiment_label)

    return sentiment_labels_ensemble

# Evaluate both models in an ensemble fashion
sentiment_labels_ensemble = ensemble_evaluate(df['text'], model_1, model_2)

# Add the results as new columns in your DataFrame
df['sentiment_label_1'] = sentiment_labels_1
df['sentiment_label_2'] = sentiment_labels_2
df['sentiment_label_ensemble'] = sentiment_labels_ensemble

Downloading (…)lve/main/config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/929 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

In [0]:
# from sklearn.metrics import classification_report

# # Calculate classification report for Model 1
# report_model_1 = classification_report(test_data['sentiment'], df['sentiment_label_1'])

# # Calculate classification report for Model 2
# report_model_2 = classification_report(test_data['sentiment'], df['sentiment_label_2'])

# # Calculate classification report for Ensemble
# report_ensemble = classification_report(test_data['sentiment'], df['sentiment_label_ensemble'])

# # Print the classification report for each model and ensemble
# print("Classification Report for Model 1:")
# print(report_model_1)

# print("\nClassification Report for Model 2:")
# print(report_model_2)

# print("\nClassification Report for Ensemble:")
# print(report_ensemble)


Classification Report for Model 1:
              precision    recall  f1-score   support

    negative       0.63      0.88      0.73      1546
     neutral       0.76      0.42      0.54      1930
    positive       0.73      0.83      0.78      1730

    accuracy                           0.69      5206
   macro avg       0.70      0.71      0.68      5206
weighted avg       0.71      0.69      0.68      5206


Classification Report for Model 2:
              precision    recall  f1-score   support

    negative       0.69      0.82      0.75      1546
     neutral       0.74      0.46      0.57      1930
    positive       0.70      0.87      0.78      1730

    accuracy                           0.70      5206
   macro avg       0.71      0.72      0.70      5206
weighted avg       0.71      0.70      0.69      5206


Classification Report for Ensemble:
              precision    recall  f1-score   support

    negative       0.63      0.88      0.73      1546
     neutral       0.

In [0]:
# from sklearn.metrics import classification_report

# # Calculate classification report for Model 1
# report_model_1 = classification_report(test_data['sentiment'], df['sentiment_label_1'], output_dict=True)

# # Calculate classification report for Model 2
# report_model_2 = classification_report(test_data['sentiment'], df['sentiment_label_2'], output_dict=True)

# # Calculate classification report for Ensemble
# report_ensemble = classification_report(test_data['sentiment'], df['sentiment_label_ensemble'], output_dict=True)

# # Extract F1-scores for each class and each model
# f1_scores_model_1 = report_model_1['weighted avg']['f1-score']
# f1_scores_model_2 = report_model_2['weighted avg']['f1-score']
# f1_scores_ensemble = report_ensemble['weighted avg']['f1-score']

# # Compare F1-scores and select the best model
# best_model = None

# if f1_scores_model_1 > f1_scores_model_2 and f1_scores_model_1 > f1_scores_ensemble:
#     best_model = "Model 1"
# elif f1_scores_model_2 > f1_scores_ensemble:
#     best_model = "Model 2"
# else:
#     best_model = "Ensemble"

# print("Best Model:", best_model)

Best Model: Model 2


In [0]:
###################- Sentiment Model

In [0]:
import torch
import pandas as pd
from transformers import pipeline
from sklearn.metrics import accuracy_score
import mlflow

In [0]:
import mlflow
import mlflow.pyfunc
import mlflow.pytorch
from transformers import pipeline
import pandas as pd

# Initialize an MLflow experiment
mlflow.set_experiment("/Users/mohamed.zahid@simpleenergy.in/sentiment_analysis")

# Start an MLflow run
with mlflow.start_run():
    # Log parameters
    mlflow.log_params({
        "model_name": "cardiffnlp/twitter-roberta-base-sentiment-latest",
        "dataset_path": "/Users/mohamed.zahid@simpleenergy.in/sentiment_analysis"
    })

    # Load the sentiment analysis model
    model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-xlm-roberta-base-sentiment")

    # Load your dataset
    delta_df = spark.read.format("delta").table("user_comments.comments")
    df = delta_df.toPandas()

    # Define a function to analyze sentiment for a single text
    def analyze_sentiment(text, model):
        # Ensure that 'text' is a string
        if not isinstance(text, str):
            text = str(text)
        
        # Get the sentiment prediction for the text
        prediction = model(text)
        
        # Extract sentiment label and score
        sentiment_label = prediction[0]['label']
        sentiment_score = prediction[0]['score']
        
        return sentiment_label, sentiment_score

    # Create empty lists to store sentiment labels and scores
    sentiment_labels = []
    sentiment_scores = []

    # Apply the sentiment analysis function to each row of the DataFrame
    for comment_text in df['comment_text']:
        label, score = analyze_sentiment(comment_text, model)
        sentiment_labels.append(label)
        sentiment_scores.append(score)

    # Add sentiment labels and scores as new columns to the DataFrame
    df['sentiment_label'] = sentiment_labels
    df['sentiment_score'] = sentiment_scores

    # Log the DataFrame as an artifact
    #mlflow.log_artifact(pd.DataFrame.to_csv(df), "dbfs:/FileStore")

# End the MLflow run
mlflow.end_run()


In [0]:
df

Unnamed: 0,date,comment_text,sentiment_label,sentiment_score
0,2023-05-13,why this looks like ather 450x,negative,0.649711
1,2023-05-12,No competition with ola 😅,neutral,0.514803
2,2023-04-25,If everything goes well this will be a big hit...,positive,0.795603
3,2023-03-31,One of the oldest auto channel in YouTube,positive,0.447022
4,2023-02-23,1and hlf yr bck I was thinking to tk franchise...,negative,0.397321
...,...,...,...,...
628,2023-05-23,Nice🎉,positive,0.644901
629,2023-05-23,Cool,positive,0.491287
630,2023-05-23,Hum first,neutral,0.476607
631,2023-05-23,1st..😂🎉,positive,0.578617
