In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load the dataset
data = pd.read_csv('Country_full.csv')

# Step 2: Split the data into features (X) and target variable (y)
X = data['Name']
y = data['Country']

# Replace NaN values with an empty string 
X.fillna('', inplace=True)

# Feature extraction using CountVectorizer
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.7, random_state=95)

# Step 4: Choose a model and train it
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=1))


# Step 7: Use the model to make predictions on new data
new_names = ['Jolly Joseph']
new_names_vectorized = vectorizer.transform(new_names)
new_predictions = model.predict(new_names_vectorized)
print(f'New Predictions: {new_names} - {new_predictions}')

Model Accuracy: 67.29%

Classification Report:
                                  precision    recall  f1-score   support

                         Armenia       0.98      0.61      0.76      3425
                       Australia       0.04      0.00      0.01      3065
                         Austria       0.90      0.53      0.66      4195
                      Azerbaijan       0.98      0.77      0.86      3845
                      Bangladesh       0.07      0.02      0.03      3485
                         Belgium       0.04      0.00      0.00      2402
          Bosnia and Herzegovina       0.07      0.02      0.03      3297
                          Brazil       0.70      0.93      0.80      3837
                        Bulgaria       0.93      0.85      0.89      4296
                          Canada       0.35      0.45      0.39      6353
                           China       0.80      0.79      0.80      3197
                        Colombia       0.67      0.87      0.76 