In [None]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the data
rawdata = pd.read_csv('name_gender.csv')

# DataFrame setup
df = pd.DataFrame(rawdata)

# Preprocessing
df['name'] = df['name'].str.lower()  # Convert names to lowercase
df['name_length'] = df['name'].apply(len)
df['initial'] = df['name'].str[0]
df['suffix'] = df['name'].str[-1]  # Last character
df['vowel_count'] = df['name'].apply(lambda x: sum(1 for char in x if char in 'aeiou'))
df['consonant_count'] = df['name'].apply(lambda x: sum(1 for char in x if char not in 'aeiou'))
df['vowel_consonant_ratio'] = df['vowel_count'] / df['consonant_count'].replace(0, 1)  # Avoid division by zero


# Features and target variable
X = df[['name_length', 'vowel_count', 'consonant_count', 'vowel_consonant_ratio']]
# Encode gender
df['gender'] = df['gender'].map({'female': 0, 'male': 1})
y = df['gender'].fillna(0)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate performance
print(classification_report(y_test, y_pred))

# Function for user interaction
def predict_gender(name):
    # Preprocess the name
    name = name.lower()
    name_length = len(name)
    initial = name[0]
    suffix = name[-1]
    vowel_count = sum(1 for char in name if char in 'aeiou')
    consonant_count = sum(1 for char in name if char not in 'aeiou')
    vowel_consonant_ratio = vowel_count / (consonant_count if consonant_count > 0 else 1)  # Avoid division by zero
    
    # Create a DataFrame for the input
    input_data = pd.DataFrame({
        'name_length': [name_length],
        'vowel_count': [vowel_count],
        'consonant_count': [consonant_count],
        'vowel_consonant_ratio': [vowel_consonant_ratio]
    })
    
    # Make prediction
    prediction = model.predict(input_data)
    return 'Male' if prediction[0] == 1 else 'Female'

# User interaction loop
while True:
    user_input = input("Enter a name (or type 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    predicted_gender = predict_gender(user_input)
    print(f"The predicted gender for '{user_input}' is: {predicted_gender}")


              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     19006

    accuracy                           1.00     19006
   macro avg       1.00      1.00      1.00     19006
weighted avg       1.00      1.00      1.00     19006



Enter a name (or type 'exit' to quit):  Rajesh


The predicted gender for 'Rajesh' is: Female
