In [None]:
import os
import re
import nltk
import pickle
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline

nltk.download('stopwords')
nltk.download('punkt')

def processText(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = nltk.word_tokenize(text)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

models = 'models'
parent = os.path.abspath(os.path.join(os.getcwd(), '..'))
path = os.path.join(parent, models, 'improved_sentiment_model.pkl')

print(f"Model file path: {path}")

if not os.path.exists(path):
    print("Error: Model file does not exist.")
else:
    try:
        with open(path, 'rb') as sentiment_model_file:
            sentiment_model = pickle.load(sentiment_model_file)
        print("Model loaded successfully!")

    except EOFError:
        print("Error: Ran out of input while loading the model. The model file might be empty or corrupted.")
    except Exception as e:
        print(f"Error loading model: {e}")

def get_sentiment_vector(sentiment_model, text_input):
    try:
        processed_text = processText(text_input)
        
        if sentiment_model is None:
            print("Error: Sentiment model is not loaded. Please ensure the model is properly loaded.")
            return None

        sentiment_scores = sentiment_model.predict_proba([processed_text])  

        print(f"Type of sentiment_scores: {type(sentiment_scores)}")

        sentiment_vector = []
        for label_scores in sentiment_scores[0]:  
            dense_scores = label_scores.toarray() if hasattr(label_scores, 'toarray') else label_scores
            sentiment_vector.extend(dense_scores.flatten())

        sentiment_vector = np.array(sentiment_vector)
        
        sentiment_vector = sentiment_vector / sentiment_vector.sum() * 100
        
        return sentiment_vector

    except Exception as e:
        print(f"Error while predicting sentiment: {e}")
        return None

input_text = input("Enter text for sentiment analysis: ")
if 'sentiment_model' in locals(): 
    sentiment_vector = get_sentiment_vector(sentiment_model, input_text)

    if sentiment_vector is not None:
        print("Sentiment Vector:", sentiment_vector)
    else:
        print("Failed to generate sentiment vector.")
else:
    print("Sentiment model is not loaded.")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\TTill\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\TTill\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Model file path: c:\Users\TTill\Semiotics_Thesis-1\models\improved_sentiment_model.pkl
Model loaded successfully!
Error while predicting sentiment: unsupported operand type(s) for *: 'csr_matrix' and 'csr_matrix'
Failed to generate sentiment vector.


In [None]:
import pickle
import pandas as pd
import re
import numpy as np

def load_model(filepath):
    with open(filepath, 'rb') as model_file:
        return pickle.load(model_file)

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    stopwords = {'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has',
                 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', 'to', 'was',
                 'were', 'will', 'with'}
    tokens = [word for word in text.split() if word not in stopwords]
    return ' '.join(tokens)

def predict_emotions_with_confidence(text, model, emotion_labels):
    
    processed_text = preprocess_text(text)
    
    base_estimators = model.named_steps['classifier'].estimators_

    X_processed = model.named_steps['tfidf'].transform([processed_text])
    X_processed = model.named_steps['scaler'].transform(X_processed)
    
    emotion_confidence_vector = np.zeros(len(emotion_labels))
    
    for idx, (emotion, estimator) in enumerate(zip(emotion_labels, base_estimators)):
        try:
            proba = estimator.predict_proba(X_processed)[0][1]
            if proba > 0.1:  
                emotion_confidence_vector[idx] = proba
        except (AttributeError, IndexError):
            prediction = estimator.predict(X_processed)[0]
            if prediction == 1:
                emotion_confidence_vector[idx] = 1.0
    
    return emotion_confidence_vector

def main():
    print("Loading the trained model...")
    model_path = 'improved_sentiment_model.pkl'
    model = load_model(model_path)

    print("Loading emotion labels...")
    emotion_labels = pd.read_csv('goemotions.csv').columns[7:]  

    print("\nType 'exit' to quit the program.")
    while True:
        user_input = input("\nEnter a text to analyze emotions: ")
        if user_input.lower() == 'exit':
            print("Exiting...")
            break

        emotion_scores_vector = predict_emotions_with_confidence(user_input, model, emotion_labels)
        
        if np.any(emotion_scores_vector):
            print("\nPredicted emotion vector (confidence scores):")
            print(emotion_scores_vector)
        else:
            print("No significant emotions detected.")

if __name__ == "__main__":
    main()
    


Loading the trained model...


FileNotFoundError: [Errno 2] No such file or directory: 'improved_sentiment_model.pkl'