In [None]:
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


data = pd.read_excel("Book1.xlsx")
data

In [None]:
#Initialize the TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(
    max_features=1000,  # Adjust the number of features as needed
    stop_words="english"  
)

tfidf_matrix = tfidf_vectorizer.fit_transform(data['processed_reviews'])

tfidf_dense_matrix = tfidf_matrix.toarray()

tfidf_df = pd.DataFrame(tfidf_dense_matrix, columns=tfidf_vectorizer.get_feature_names_out())

final_data = pd.concat([data, tfidf_df], axis=1)

final_data

In [None]:

svm_model = SVC(kernel="linear")  # You can adjust the kernel and other hyperparameters

train_data, test_data = train_test_split(final_data, test_size=0.2, random_state=42)

target_column = "review_tag"
features_columns = tfidf_vectorizer.get_feature_names_out()  

X_train = train_data[features_columns]
y_train = train_data[target_column]

X_test = test_data[features_columns]
y_test = test_data[target_column]

svm_model.fit(X_train, y_train)
predictions = svm_model.predict(X_test)

report = classification_report(y_test, predictions)

conf_matrix = confusion_matrix(y_test, predictions)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

accuracy = accuracy_score(y_test, predictions)

print("\nAccuracy:", accuracy)
print("\nClassification Report:\n", report)
