In [4]:
import gensim
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Assuming X is your text data and y is your label
# Preprocess your text data (e.g., tokenization)
X_tokens = [sentence.split() for sentence in X]  # Simple tokenization; modify as needed

# Train a Word2Vec model on your tokenized data
word2vec_model = Word2Vec(sentences=X_tokens, vector_size=100, window=5, min_count=1, workers=4)

# Create average Word2Vec embeddings for each document
def get_word2vec_embedding(doc, model):
    # Average the vectors for all the words in the document
    return np.mean([model.wv[word] for word in doc if word in model.wv], axis=0)

X_embeddings = np.array([get_word2vec_embedding(doc, word2vec_model) for doc in X_tokens])

# Handle cases where documents might be too short (or missing words from vocab)
X_embeddings = np.nan_to_num(X_embeddings)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_embeddings, y, test_size=0.2, random_state=42)

# Initialize and train the SVM model with a linear kernel
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# Make predictions
y_pred_svm = svm_model.predict(X_test)

# Evaluate the model's performance
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

# Save the SVM model and the Word2Vec model for future use
with open('svm_model_word2vec.pkl', 'wb') as f:
    pickle.dump(svm_model, f)

word2vec_model.save('word2vec_model.bin')

# Confusion Matrix to visualize performance
cm = confusion_matrix(y_test, y_pred_svm)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Machine', 'Human'], yticklabels=['Machine', 'Human'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('SVM Confusion Matrix')
plt.show()


ModuleNotFoundError: No module named 'gensim'