In [None]:
import pandas as pd
import numpy as np
import nltk
nltk.download('stopwords')
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix,classification_report

#Reading the dataset
df = pd.read_csv('disaster_tweets_data(DS).csv')
tweets = df['tweets']
target = df['target']
#Handle null values(if any)
df['tweets'].isnull().any()
df['target'].isnull().any()
#Preprocess the disaster tweet data
def preprocess_text(text):
  words = text.split()
  words = [word.lower() for word in words]
  import string
  punctuation = string.punctuation
  words = [word for word in words if word not in punctuation]
  from nltk.corpus import stopwords
  stop_words = stopwords.words('english')
  words = [word for word in words if word not in stop_words]
  from nltk.stem import WordNetLemmatizer
  nltk.download('wordnet')
  lemmatizer = WordNetLemmatizer()
  words = [lemmatizer.lemmatize(word) for word in words]
  return " ".join(words)
processed_tweets = [preprocess_text(tweet) for tweet in tweets]
#Transforming the words into vectors using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(processed_tweets)
#Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.2, random_state=42)
#Model Training and Evaluation
models = [
  ("Multinomial Naive Bayes", MultinomialNB()),
  ("Logistic Regression", LogisticRegression()),
  ("KNN", KNeighborsClassifier())
]
for name, model in models:
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  print(name)
  print(confusion_matrix(y_test, y_pred))
  print(classification_report(y_test, y_pred))
  #Calculate Accuracy(i am doing this as optional)
  accuracy = model.score(X_test, y_test)
  print("Accuracy:", accuracy)
  #Calculate F-1 Score(i am using any metrics of my choice)
  from sklearn.metrics import f1_score
  f1 = f1_score(y_test, y_pred)
  print("F1 Score:", f1)
  #Store result
  result = [name, accuracy, f1]
#Predict the target for test data
y_pred = model.predict(X_test)
#Calculate confusion matrix and classification report of Multinomial Naive Bayes Classification Model
if name == "Multinomial Naive Bayes":
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)
    print("Classification Report:")
    print(cr)
#Calculate confusion matrix and classification report of Logistic Regression Model
if name == "Logistic Regression":
  cm = confusion_matrix(y_test, y_pred)
  cr = classification_report(y_test, y_pred)
  print("Confusion Matrix:")
  print(cm)
  print("Classification Report:")
  print(cr)
#Calculate confusion matrix and classification report of KNN Classification
if name == "KNN":
  cm = confusion_matrix(y_test, y_pred)
  cr = classification_report(y_test, y_pred)
  print("Confusion Matrix:")
  print(cm)
  print("Classification Report:")
  print(cr)

#Report the best Model
best_model = max(models, key=lambda x: x[1].score(X_test, y_test))
print("Best Model:", best_model[0])




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Dow

Multinomial Naive Bayes
[[737 137]
 [163 486]]
              precision    recall  f1-score   support

           0       0.82      0.84      0.83       874
           1       0.78      0.75      0.76       649

    accuracy                           0.80      1523
   macro avg       0.80      0.80      0.80      1523
weighted avg       0.80      0.80      0.80      1523

Accuracy: 0.8030203545633617
F1 Score: 0.7641509433962264
Logistic Regression
[[755 119]
 [189 460]]
              precision    recall  f1-score   support

           0       0.80      0.86      0.83       874
           1       0.79      0.71      0.75       649

    accuracy                           0.80      1523
   macro avg       0.80      0.79      0.79      1523
weighted avg       0.80      0.80      0.80      1523

Accuracy: 0.7977675640183848
F1 Score: 0.749185667752443
KNN
[[852  22]
 [438 211]]
              precision    recall  f1-score   support

           0       0.66      0.97      0.79       874
     