In [None]:
import joblib
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Step 1: Load the trained model and vectorizers
model = joblib.load('stock_sentiment_model.pkl')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')
label_encoder = joblib.load('label_encoder.pkl')


In [None]:
# Step 2: Load your new Reddit data (assuming you've already fetched it)
df = pd.read_csv('abc.csv')

# Step 3: Preprocess the data for prediction
# Combine the 'title' and 'text' columns to create 'cleaned_text'
df['cleaned_text'] = df['title'] + " " + df['text']  # Or use just title or text

# Step 4: Handle missing values in 'cleaned_text'
# You can fill NaN with an empty string or drop rows with NaN values
df['cleaned_text'] = df['cleaned_text'].fillna('')  # Fill NaN with an empty string

In [None]:
# Step 5: Vectorize the text data using the same TF-IDF vectorizer you used during training
# Ensure that the tfidf_vectorizer is using the same vocabulary as when it was fitted
X_tfidf = tfidf_vectorizer.transform(df['cleaned_text'])  # Use transformed cleaned_text

# Extract numerical features from stock-related columns
X_numerical = df[['score', 'upvotes', 'comments']]  # Add any other numerical features if necessary

# Align the number of features: Ensure the text vectorizer's output matches training data
X_combined = np.hstack([X_numerical, X_tfidf.toarray()])

In [None]:
# Step 6: Make predictions using the trained model
y_pred = model.predict(X_combined)

# Step 7: Decode the predicted labels back to original sentiment categories
y_pred_labels = label_encoder.inverse_transform(y_pred)

# Step 8: Add predictions to the DataFrame
df['predicted_sentiment'] = y_pred_labels

# Step 9: Display the results
print(df[['title', 'predicted_sentiment']].head())

In [None]:
# Step 10: Save the results to a CSV file (optional)
df.to_csv('reddit_predictions_with_sentiment.csv', index=False)