# Naive Bayes

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Load the CSV file
df = pd.read_csv('final2.csv')

# Preprocess the data (you may need additional preprocessing steps)
# For example, removing special characters, lowercasing, etc.
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
nltk.download('punkt')
nltk.download('stopwords')
def preprocess_function(text):
    # Remove special characters and numbers (keep only letters and spaces)
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Convert text to lowercase
    text = text.lower()

    # Tokenize the text into words

    words = word_tokenize(text)

    # Remove stopwords (common words that may not carry much meaning)
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]

    # Join the words back into a single string
    text = ' '.join(words)

    return text

df['text'] = df['text'].apply(preprocess_function)

# Split the data into training and testing sets
X = df['text']
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create TF-IDF vectors for the text data
vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust the max_features as needed
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train_tfidf, y_train)

# Get user input for a keyword
keyword = input("Enter a keyword: ")

# Predict sentiment for the user's input
keyword_tfidf = vectorizer.transform([keyword])
prediction = clf.predict(keyword_tfidf)

# Display the sentiment prediction
if prediction == 1:
    print("Positive sentiment")
else:
    print("Negative sentiment")

# Evaluate the model
y_pred = clf.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\nAccuracy:",accuracy)
print("\nClassification Report:\n", report)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Enter a keyword: dataset
Positive sentiment

Accuracy: 0.6747095621090259

Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.63      0.66       716
           1       0.64      0.75      0.69       763
           2       0.70      0.64      0.67       759

    accuracy                           0.67      2238
   macro avg       0.68      0.67      0.67      2238
weighted avg       0.68      0.67      0.67      2238



#Random Forest Algorithm


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the CSV file
df = pd.read_csv('final.csv')

# Preprocess the data (you may need additional preprocessing steps)
# For example, removing special characters, lowercasing, etc.
# df['text'] = df['text'].apply(preprocess_function)

# Split the data into training and testing sets
X = df['text']
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create TF-IDF vectors for the text data
vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust the max_features as needed
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)  # You can adjust the number of estimators as needed
clf.fit(X_train_tfidf, y_train)

# Get user input for a keyword
keyword = input("Enter a keyword: ")

# Predict sentiment for the user's input
keyword_tfidf = vectorizer.transform([keyword])
prediction = clf.predict(keyword_tfidf)

# Display the sentiment prediction
if prediction == 1:
    print("Positive sentiment")
else:
    print("Negative sentiment")

# Evaluate the model
y_pred = clf.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\nAccuracy:", accuracy)
print("\nClassification Report:\n", report)


Enter a keyword: fine
Negative sentiment

Accuracy: 0.6501340482573726

Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.57      0.63       716
           1       0.69      0.61      0.65       763
           2       0.59      0.77      0.67       759

    accuracy                           0.65      2238
   macro avg       0.66      0.65      0.65      2238
weighted avg       0.66      0.65      0.65      2238



# SVM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the CSV file
df = pd.read_csv('final.csv')

# Preprocess the data (you may need additional preprocessing steps)
# For example, removing special characters, lowercasing, etc.
# df['text'] = df['text'].apply(preprocess_function)

# Split the data into training and testing sets
X = df['text']
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create TF-IDF vectors for the text data
vectorizer = TfidfVectorizer(max_features=5000)  # You can adjust the max_features as needed
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train an SVM classifier
clf = SVC(kernel='linear', C=1.0, random_state=42)  # You can adjust the kernel and C parameter as needed
clf.fit(X_train_tfidf, y_train)

# Get user input for a keyword
keyword = input("Enter a keyword: ")

# Predict sentiment for the user's input
keyword_tfidf = vectorizer.transform([keyword])
prediction = clf.predict(keyword_tfidf)

# Display the sentiment prediction
if prediction == 1:
    print("Positive sentiment")
else:
    print("Negative sentiment")

# Evaluate the model
y_pred = clf.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\nAccuracy:", accuracy)
print("\nClassification Report:\n", report)


Enter a keyword: fine
Positive sentiment

Accuracy: 0.7225201072386059

Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.67      0.69       716
           1       0.76      0.69      0.72       763
           2       0.69      0.81      0.75       759

    accuracy                           0.72      2238
   macro avg       0.73      0.72      0.72      2238
weighted avg       0.73      0.72      0.72      2238

