In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

tweets_df = pd.read_csv("tweets_country_region.csv")

tweets_df = tweets_df.dropna(subset=['region'])

train_df, test_df = train_test_split(tweets_df, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train = vectorizer.fit_transform(train_df['text'])
X_test = vectorizer.transform(test_df['text'])

nb_model = MultinomialNB()
nb_model.fit(X_train, train_df['region'])

test_df['predicted_region'] = nb_model.predict(X_test)

print("Classification Report:\n", classification_report(test_df['region'], test_df['predicted_region']))

print(test_df[['text', 'region', 'predicted_region']].head())
print(f"Test DataFrame dimensions: {test_df.shape}")
