In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [2]:
import pandas as pd

# Replace 'your_file.tsv' with the path to your TSV file
file_path = '/content/sentimentalreview_IBMpro.tsv'

# Use Pandas to read the TSV file into a DataFrame
try:
    df = pd.read_csv(file_path, sep='\t')  # Assuming tab ('\t') is used as the separator
    # You can specify other options like encoding, header, etc., based on your file's characteristics.

    # Now, you can work with the DataFrame 'df' as needed.
    print("File uploaded successfully. DataFrame shape:", df.shape)

    # Example: Display the first few rows of the DataFrame
    print(df.head())

except FileNotFoundError:
    print("File not found. Please check the file path.")
except pd.errors.EmptyDataError:
    print("File is empty. Please provide a valid TSV file.")
except pd.errors.ParserError:
    print("Error parsing the file. Check the file format and delimiter.")


File uploaded successfully. DataFrame shape: (1000, 2)
                                              Review  Liked
0                           Wow... Loved this place.      1
1                                 Crust is not good.      0
2          Not tasty and the texture was just nasty.      0
3  Stopped by during the late May bank holiday of...      1
4  The selection on the menu was great and so wer...      1


In [7]:
df['Review'].fillna('', inplace=True)  # Replace NaN values in the 'text' column with empty strings
df['Liked'].fillna(0, inplace=True)  # Replace NaN values in the 'label' column with 0 (or any appropriate value)
X = df['Review']
y = df['Liked'].astype(int)

In [8]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust max_features as needed
X_tfidf = tfidf_vectorizer.fit_transform(X)


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

In [10]:
svm_classifier = SVC(kernel='linear', random_state=42)

# Create Naive Bayes classifier (Multinomial Naive Bayes)
nb_classifier = MultinomialNB()

In [11]:
voting_classifier = VotingClassifier(estimators=[('svm', svm_classifier), ('nb', nb_classifier)], voting='hard')

# Train the voting classifier
voting_classifier.fit(X_train, y_train)

# Predict using the voting classifier
y_pred_voting = voting_classifier.predict(X_test)

In [12]:
voting_accuracy = accuracy_score(y_test, y_pred_voting)
print("Voting Classifier Accuracy:", voting_accuracy)


Voting Classifier Accuracy: 0.8


In [13]:
precision = precision_score(y_test, y_pred_voting)
recall = recall_score(y_test, y_pred_voting)
f1 = f1_score(y_test, y_pred_voting)

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Precision: 0.9102564102564102
Recall: 0.6826923076923077
F1-score: 0.7802197802197801


In [14]:
from sklearn.metrics import precision_score, recall_score, f1_score


In [15]:
weighted_precision = precision_score(y_test, y_pred_voting, average='weighted')
macro_precision = precision_score(y_test, y_pred_voting, average='macro')

In [17]:
weighted_recall = recall_score(y_test, y_pred_voting, average='weighted')
macro_recall = recall_score(y_test, y_pred_voting, average='macro')

# Calculate weighted average and macro average F1-score
weighted_f1 = f1_score(y_test, y_pred_voting, average='weighted')
macro_f1 = f1_score(y_test, y_pred_voting, average='macro')


In [18]:
print("Weighted Precision:", weighted_precision)
print("Macro Precision:", macro_precision)
print("Weighted Recall:", weighted_recall)
print("Macro Recall:", macro_recall)
print("Weighted F1-score:", weighted_f1)
print("Macro F1-score:", macro_f1)

Weighted Precision: 0.823497267759563
Macro Precision: 0.8198823034888609
Weighted Recall: 0.8
Macro Recall: 0.8048878205128205
Weighted F1-score: 0.7976408912188728
Macro F1-score: 0.798366770843835


In [19]:
if 0.8 <= voting_accuracy:
    print("Target accuracy (80%) achieved!")
else:
    print("Target accuracy not achieved. You may need to fine-tune the models or gather more data.")

Target accuracy (80%) achieved!
