In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import pickle
import nltk
from nltk.corpus import stopwords
import string
import seaborn as sns
from nltk.stem.porter import PorterStemmer
from wordcloud import WordCloud
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Import SVM
from sklearn.metrics import accuracy_score, precision_score

In [None]:
nltk.download('punkt')
nltk.download('stopwords')

In [None]:
# Load the dataset
df = pd.read_csv('fake reviews dataset.csv')
print(df.columns)

In [None]:
df.drop(columns = {'category'}, inplace = True)
print(df.columns)

In [None]:
df.info()

In [None]:
df.rename(columns = {'text_': 'text'}, inplace = True)
df.head()

In [7]:
# Function to encode the target labels
def encode_label(df):
    labels = {
        'CG': 1,  # Fake review
    }
    df['target'] = df['label'].map(labels)
    return df


In [None]:
# Apply the label encoding
df = encode_label(df)
df['target'] = df['target'].fillna(0).replace([np.inf, -np.inf], 0).astype(int)
df.head()

In [9]:
# Preprocessing: Tokenization, removing stopwords, stemming
ps = PorterStemmer()

In [None]:
def transform_text(text):
    text = text.lower()
    text = nltk.word_tokenize(text)
    
    # Remove non-alphanumeric characters and apply stemming
    text = [ps.stem(word) for word in text if word.isalnum() and word not in stopwords.words('english')]
    
    return " ".join(text)

df.head()

In [None]:
# Apply text transformation
df['transformed_text'] = df['text'].apply(transform_text)

In [None]:
# TF-IDF Vectorization
tfidf = TfidfVectorizer(max_features=3000)
X = tfidf.fit_transform(df['transformed_text']).toarray()
y = df['target'].values

In [None]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [None]:
# Train an SVM model
svm_model = SVC(kernel='linear')  # You can change the kernel if needed (e.g., 'rbf', 'poly', 'sigmoid')
svm_model.fit(X_train, y_train)

In [None]:
# Evaluate the model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')

In [None]:
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")

In [None]:
# Save the trained model and vectorizer
with open('svm_model.pkl', 'wb') as model_file:
    pickle.dump(svm_model, model_file)

with open('svm_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(tfidf, vectorizer_file)