In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

In [38]:
df = pd.read_csv('Gender.csv')

In [39]:
vec = CountVectorizer()
X = vec.fit_transform(df['Name'])

In [40]:
encoder = LabelEncoder()
Y = encoder.fit_transform(df['Gender'])

In [41]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)

In [42]:
#svm

svm = SVC()
svm.fit(X_train,Y_train)
svm_pred = svm.predict(X_test)
svm_acc = accuracy_score(Y_test,svm_pred)
print("SVM Accuracy:",svm_acc)

SVM Accuracy: 0.47058823529411764


In [43]:
#naive bayes

nb = MultinomialNB()
nb.fit(X_test,Y_test)
nb_pred = nb.predict(X_test)
nb_acc = accuracy_score(Y_test,nb_pred)
print("Multinomial NB accuracy:",nb_acc)

Multinomial NB accuracy: 1.0


In [44]:
#logistic regression 

lr = LogisticRegression()
lr.fit(X_train,Y_train)
lr_pred = lr.predict(X_test)
lr_acc=accuracy_score(Y_test,lr_pred)
print("Logistic Regression accuracy:",lr_acc)


Logistic Regression accuracy: 0.47058823529411764


In [45]:
names = ['Aurora','Jefrey']
names_t = vec.transform(names)
svmp = svm.predict(names_t)
nbp = nb.predict(names_t)
lrp = lr.predict(names_t)
print("SVM:",encoder.inverse_transform(svmp))
print("NB:",encoder.inverse_transform(nbp))
print("LR:",encoder.inverse_transform(lrp))

SVM: ['Male' 'Male']
NB: ['Female' 'Female']
LR: ['Male' 'Male']


In [26]:
with open('vectorizer.pkl', 'wb') as file:
    pickle.dump(vec, file)

with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(encoder, file)

with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm, file)

with open('nb_model.pkl', 'wb') as file:
    pickle.dump(nb, file)

with open('log_reg_model.pkl', 'wb') as file:
    pickle.dump(lr, file)

In [29]:
with open('vectorizer.pkl', 'rb') as file:
    loaded_vec = pickle.load(file)

with open('label_encoder.pkl', 'rb') as file:
    loaded_encoder = pickle.load(file)

with open('svm_model.pkl', 'rb') as file:
    loaded_svm = pickle.load(file)

with open('nb_model.pkl', 'rb') as file:
    loaded_nb = pickle.load(file)

with open('log_reg_model.pkl', 'rb') as file:
    loaded_lr = pickle.load(file)


In [30]:
new_names = ['George', 'Hannah']
new_names_transformed = loaded_vec.transform(new_names)


In [33]:
svm_predictions = loaded_svm.predict(new_names_transformed)
nb_predictions = loaded_nb.predict(new_names_transformed)
log_reg_predictions = loaded_lr.predict(new_names_transformed)


svm_predictions_labels = loaded_encoder.inverse_transform(svm_predictions)
nb_predictions_labels = loaded_encoder.inverse_transform(nb_predictions)
log_reg_predictions_labels = loaded_encoder.inverse_transform(log_reg_predictions)

In [34]:
print("SVM Predictions:", svm_predictions_labels)
print("Naive Bayes Predictions:", nb_predictions_labels)
print("Logistic Regression Predictions:", log_reg_predictions_labels)

SVM Predictions: ['Male' 'Male']
Naive Bayes Predictions: ['Female' 'Female']
Logistic Regression Predictions: ['Male' 'Male']
