## Gender Prediction from Name

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
from sklearn.metrics import accuracy_score,f1_score
from sklearn.feature_extraction import DictVectorizer


### Predicting Gender from name 

In [3]:
data=pd.read_csv("GenderPrediction .csv")

In [4]:
data.columns

Index(['Name', 'Gender', 'LastLetter', ' ', ' .1', ' .2', ' .3', ' .4', ' .5',
       ' .6', ' .7', ' .8', ' .9', ' .10', ' .11', ' .12', ' .13', ' .14',
       ' .15', ' .16', ' .17', ' .18', ' .19', ' .20', ' .21', ' .22'],
      dtype='object')

In [5]:
data=data.drop(columns=[' ', ' .1', ' .2', ' .3', ' .4', ' .5',
       ' .6', ' .7', ' .8', ' .9', ' .10', ' .11', ' .12', ' .13', ' .14',
       ' .15', ' .16', ' .17', ' .18', ' .19', ' .20', ' .21', ' .22'])

In [6]:
data.head()

Unnamed: 0,Name,Gender,LastLetter
0,Ashutosh,Male,h
1,Meghamala,Female,a
2,Sahib,Male,b
3,Pragya,Female,a
4,Kranti,Female,i


### Splitting data into testing and Validation 

In [7]:
#Feature  
x=data[['LastLetter']]
#Target
y=data['Gender']

In [8]:
X_train, X_validation, y_train, y_validation = train_test_split(x, y, test_size=0.3, random_state=40, stratify=y)

In [9]:
vectorizer = DictVectorizer(sparse=False)
X_train_dict = X_train.to_dict(orient='records')
X_validation_dict = X_validation.to_dict(orient='records')

In [10]:
X_train_encoded = vectorizer.fit_transform(X_train_dict)
X_validation_encoded = vectorizer.transform(X_validation_dict)

### Model Training and Prediction and checking Accuracy and f1 score

In [11]:
#Model training and predicting 
model = MultinomialNB()
model.fit(X_train_encoded, y_train)

y_pred = model.predict(X_validation_encoded)

accuracy = accuracy_score(y_validation, y_pred)
f1 = f1_score(y_validation, y_pred, pos_label='Female')  # Positive class Female
print(f"Accuracy of MultinomialNB(): {accuracy:.4f}")
print(f"F1-score for MultinomialNB(): {f1:.4f}")

Accuracy of MultinomialNB(): 0.8191
F1-score for MultinomialNB(): 0.8219


### Predicting  for new name

In [None]:
def predict_gender(name):
    last_letter = name[-1].lower()  
    last_letter_dict = {'LastLetter': last_letter}  # dictionary for the last letter
    last_letter_encoded = vectorizer.transform([last_letter_dict])  # Transform it using the vectorizer
    prediction = model.predict(last_letter_encoded)  
    return prediction[0]

In [None]:
new_name = input("Enter a name to predict its gender: ")
predicted_gender = predict_gender(new_name)
print(f"The predicted gender for the name {new_name} is: {predicted_gender}")