In [1]:
import pandas as pd
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import numpy as np

# Load the dataset
df = pd.read_csv('sentiment.csv')

# Preprocess the data if needed (handling missing values, text cleaning, etc.)

# Tokenize text data
tokenized_text = [sentence.split() for sentence in df['sentence']]

# Train Word2Vec model
word2vec_model = Word2Vec(sentences=tokenized_text, vector_size=100, window=5, min_count=1, workers=4)

# Prepare data for model training
max_length = max(len(sentence.split()) for sentence in df['sentence'])
X = np.zeros((len(tokenized_text), max_length, 100))  # Initialize with zeros
for i, sentence in enumerate(tokenized_text):
    for j, word in enumerate(sentence):
        X[i, j] = word2vec_model.wv[word]  # Assign word vector to corresponding position

y = df['mood']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train sentiment analysis model (example: Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# Make predictions on the test set
y_pred = model.predict(X_test.reshape(X_test.shape[0], -1))

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy}')

# Save the trained model
joblib.dump((word2vec_model, model), 'sentiment_model.pkl')


Model Accuracy: 0.8037974683544303


['sentiment_model.pkl']