<a href="https://colab.research.google.com/github/Jaizxzx/TTS/blob/main/Lab/Lab4-Word2Vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Loading the csv files data

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('/content/sample.csv')
X = df['text']
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Basic Preprocessing and Word2Vec model training

In [47]:
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess

def preprocess(text):
    return simple_preprocess(text)

X_train_preprocessed = [preprocess(text) for text in X_train]

model = Word2Vec(sentences=X_train_preprocessed, vector_size=100, window=5, min_count=1, workers=4)


### Vectorizing the words into np array

In [48]:
import numpy as np

def get_document_vector(text, model):
    words = preprocess(text)
    word_vectors = [model.wv[word] for word in words if word in model.wv]
    if len(word_vectors) == 0:
        return np.zeros(model.vector_size)
    return np.mean(word_vectors, axis=0)

X_train_vectors = np.array([get_document_vector(text, model) for text in X_train])
X_test_vectors = np.array([get_document_vector(text, model) for text in X_test])

### Using the classifiers to classify the dataset in this case using the SVM classifier

In [53]:
from sklearn.svm import SVC

svm_classifier = SVC(kernel='rbf', C=1.0, random_state=42)
svm_classifier.fit(X_train_vectors, y_train)

### Evaluating the model

In [54]:
from sklearn.metrics import classification_report, accuracy_score

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))

print("SVM Results:")
evaluate_model(svm_classifier, X_test_vectors, y_test)



SVM Results:
Accuracy: 0.6944444444444444
              precision    recall  f1-score   support

    negative       0.70      0.74      0.72        19
    positive       0.69      0.65      0.67        17

    accuracy                           0.69        36
   macro avg       0.69      0.69      0.69        36
weighted avg       0.69      0.69      0.69        36

