In [1]:
import pandas as pd
import pickle
import os
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
import csv

# Function to load CSV data
def load_data(file_path):
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    else:
        print(f"CSV file at {file_path} does not exist!")
        return pd.DataFrame(columns=["name", "gender"])

# Function to load model and vectorizer from Pickle
def load_model(model_path, vectorizer_path):
    if os.path.exists(model_path) and os.path.exists(vectorizer_path):
        with open(model_path, 'rb') as model_file, open(vectorizer_path, 'rb') as vectorizer_file:
            model = pickle.load(model_file)
            vectorizer = pickle.load(vectorizer_file)
        print("Model and vectorizer loaded successfully.")
        return model, vectorizer
    else:
        print("No model or vectorizer found, starting with a new model.")
        return None, None

# Function to train the model
def train_model(data):
    # Convert names into features
    vectorizer = CountVectorizer(analyzer='char')
    X = vectorizer.fit_transform(data['name'])
    y = data['gender']
    
    # Train a simple decision tree classifier
    model = DecisionTreeClassifier()
    model.fit(X, y)
    
    return model, vectorizer

# Function to predict gender based on the model
def predict_gender(model, vectorizer, name):
    name_features = vectorizer.transform([name])
    return model.predict(name_features)[0]

# Function to get feedback from the user
def get_feedback(name):
    print(f"Sorry, I couldn't determine the gender for the name '{name}'.")
    gender = input("Please provide the gender (M/F): ").strip().upper()
    while gender not in ['M', 'F']:
        print("Invalid input! Please enter 'M' or 'F'.")
        gender = input("Please provide the gender (M/F): ").strip().upper()
    return gender

# Function to save the feedback to the CSV
def save_feedback(file_path, name, gender):
    with open(file_path, mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([name, gender])

# Function to save the trained model and vectorizer using Pickle
def save_model(model, vectorizer, model_path, vectorizer_path):
    with open(model_path, 'wb') as model_file, open(vectorizer_path, 'wb') as vectorizer_file:
        pickle.dump(model, model_file)
        pickle.dump(vectorizer, vectorizer_file)
    print("Model and vectorizer saved successfully.")

# Main function
def main():
    file_path = "name_gender.csv"
    model_path = "gender_model.pkl"
    vectorizer_path = "vectorizer.pkl"

    data = load_data(file_path)

    # Load existing model and vectorizer from Pickle, or train a new one if not available
    model, vectorizer = load_model(model_path, vectorizer_path)

    # If model or vectorizer does not exist, train a new model
    if model is None or vectorizer is None:
        if data.empty:
            print("No data available to train the model. Exiting...")
            return
        model, vectorizer = train_model(data)

    while True:
        # User input for name
        user_name = input("Enter a name (or type 'exit' to quit): ").strip()
        
        if user_name.lower() == 'exit':
            break
        
        # Check if the name exists in the CSV data
        if user_name in data['name'].values:
            gender = data[data['name'] == user_name]['gender'].values[0]
            print(f"The gender for '{user_name}' from the memory is: {gender}")
        else:
            # If name is not found in the CSV, predict using the ML model
            predicted_gender = predict_gender(model, vectorizer, user_name)
            print(f"I think the gender for '{user_name}' is: {predicted_gender}")
            
            # Ask for user feedback if unsure
            user_feedback = input("Is this correct? (Y/N): ").strip().lower()
            if user_feedback == 'n':
                gender = get_feedback(user_name)
                # Save the feedback to the CSV for future learning
                save_feedback(file_path, user_name, gender)
                print(f"Thank you for the feedback! Gender for '{user_name}' saved as {gender}.")
                
                # Retrain the model with the new data
                data = load_data(file_path)  # Reload the data with the new feedback
                model, vectorizer = train_model(data)  # Retrain the model
                # Save the updated model and vectorizer
                save_model(model, vectorizer, model_path, vectorizer_path)
            else:
                print("Thanks for confirming!")

if __name__ == "__main__":
    main()


No model or vectorizer found, starting with a new model.


NameError: name 'DecisionTreeClassifier' is not defined