In [2]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib



In [4]:

# Step 1: Load the dataset
dataset_path = 'tweet_emotions.csv'
df = pd.read_csv(dataset_path)



In [None]:
# Step 2: Text Preprocessing
#nltk.download('punkt')
#nltk.download('stopwords')
#nltk.download('wordnet')

def preprocess_text(text):
    tokens = word_tokenize(text)
    tokens = [word.lower() for word in tokens if word.isalpha()]
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if not word in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

# Apply preprocessing to the 'text' column
df['cleaned_text'] = df['content'].apply(preprocess_text)

# Step 3: Feature Extraction (TF-IDF)
vectorizer = TfidfVectorizer(max_features=1000)
X = vectorizer.fit_transform(df['cleaned_text'])


In [None]:

# Step 4: Model Training (Logistic Regression)
y = df['sentiment']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Logistic Regression model
model = LogisticRegression(max_iter=1500)
model.fit(X_train, y_train)


# model evaluation

In [1]:

# Step 5: Model Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Step 6: Save the Model (Optional)
model_path = 'emotion_detection_model.pkl'
vectorizer_path = 'tfidf_vectorizer.pkl'
joblib.dump(model, model_path)
joblib.dump(vectorizer, vectorizer_path)

# Function to predict sentiment from new text
def predict_sentiment(text):
    cleaned_text = preprocess_text(text)
    text_vector = vectorizer.transform([cleaned_text])
    prediction = model.predict(text_vector)
    return prediction[0]

# Example usage of the prediction function
example_tweet = "this old man is really weak I wish I could help him out somehow"
predicted_sentiment = predict_sentiment(example_tweet)
print(f"Predicted Sentiment: {predicted_sentiment}")

NameError: name 'model' is not defined