In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('name_gender.csv')
df.shape

(95024, 2)

In [4]:
df.columns=["Names","Gender"]
df.head()

Unnamed: 0,Names,Gender
0,Aabha,F
1,Aabid,M
2,Aabriella,F
3,Aada,F
4,Aadam,M


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95024 entries, 0 to 95023
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Names   95024 non-null  object
 1   Gender  95024 non-null  object
dtypes: object(2)
memory usage: 1.5+ MB


In [6]:
df.isnull().sum()

Unnamed: 0,0
Names,0
Gender,0


In [7]:
df['Gender'].value_counts()

Unnamed: 0_level_0,count
Gender,Unnamed: 1_level_1
F,60304
M,34720


In [8]:
df['Names'] = df['Names'].str.lower()

df.head()

Unnamed: 0,Names,Gender
0,aabha,F
1,aabid,M
2,aabriella,F
3,aada,F
4,aadam,M


In [9]:
df['Gender'] = df['Gender'].map({'M': 0, 'F': 1})

In [10]:
df.head()

Unnamed: 0,Names,Gender
0,aabha,1
1,aabid,0
2,aabriella,1
3,aada,1
4,aadam,0


In [11]:
df['Gender'].unique()

array([1, 0])

In [12]:
df.dtypes

Unnamed: 0,0
Names,object
Gender,int64


In [13]:
import tensorflow as tf
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [14]:
vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3))  # You can adjust n-gram range
X = vectorizer.fit_transform(df['Names'])
y = df['Gender']


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, input_dim=X_train.shape[1], activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [17]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [18]:
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)


Epoch 1/20
[1m1901/1901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 8ms/step - accuracy: 0.8063 - loss: 0.4055 - val_accuracy: 0.8821 - val_loss: 0.2781
Epoch 2/20
[1m1901/1901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.9045 - loss: 0.2256 - val_accuracy: 0.8905 - val_loss: 0.2581
Epoch 3/20
[1m1901/1901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.9367 - loss: 0.1577 - val_accuracy: 0.8935 - val_loss: 0.2708
Epoch 4/20
[1m1901/1901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.9570 - loss: 0.1066 - val_accuracy: 0.8940 - val_loss: 0.3020
Epoch 5/20
[1m1901/1901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.9720 - loss: 0.0723 - val_accuracy: 0.8950 - val_loss: 0.3731
Epoch 6/20
[1m1901/1901[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.9795 - loss: 0.0509 - val_accuracy: 0.8964 - val_loss: 0.4132
Epoch 7/20

<keras.src.callbacks.history.History at 0x7bce7f4aeb90>

In [19]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")


[1m594/594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [20]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.8957


In [21]:
print(classification_report(y_test, y_pred, target_names=['Male', 'Female']))


              precision    recall  f1-score   support

        Male       0.84      0.88      0.86      6963
      Female       0.93      0.91      0.92     12042

    accuracy                           0.90     19005
   macro avg       0.89      0.89      0.89     19005
weighted avg       0.90      0.90      0.90     19005



In [27]:
# Load the model
loaded_model = joblib.load('gender_prediction_model.pkl')

# Load the vectorizer
vectorizer = joblib.load('vectorizer.pkl')

# Predict gender from a new name
def predict_gender(name):
    features = vectorizer.transform([name])
    prediction = (loaded_model.predict(features) > 0.5).astype("int32")
    return 'Female' if prediction == 1 else 'Male'

name = input("Enter a name to predict the gender: ")
print(f"The predicted gender is: {predict_gender(name)}")



Enter a name to predict the gender: rama
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
The predicted gender is: Female
