In [59]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

import chardet

In [56]:
# Detect the encoding of the file
with open('/content/drive/MyDrive/Dataset/Indian-Name.csv', 'rb') as f:
    result = chardet.detect(f.read())

# Print the detected encoding
print(result['encoding'])

# Read the CSV file with the detected encoding
df = pd.read_csv('/content/drive/MyDrive/Dataset/Indian-Name.csv', encoding=result['encoding'])

ISO-8859-1


In [60]:
# Separate features and target
X = df['Name']
y = df['Target']

In [61]:
# Convert names to numerical features using LabelEncoder
label_encoder = LabelEncoder()
X_encoded = label_encoder.fit_transform(X)

In [62]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [71]:
# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train.reshape(-1, 1), y_train)
dt_predictions = dt_classifier.predict(X_test.reshape(-1, 1))
dt_accuracy = accuracy_score(y_test, dt_predictions)
print("Decision Tree Accuracy:", dt_accuracy)

# Display predictions with names and targets
dt_results = pd.DataFrame({'Name': label_encoder.inverse_transform(X_test), 'Target': y_test, 'Prediction': dt_predictions})
print("Decision Tree Predictions:")
print(dt_results)

Decision Tree Accuracy: 0.5884615384615385
Decision Tree Predictions:
         Name  Target  Prediction
1231   Mrunal       0           1
578    Muskan       0           0
1164    Bobby       1           1
722    Monali       0           1
561     Hetul       1           0
...       ...     ...         ...
199   Ruchita       0           1
671   Meghana       0           1
1202    Meher       0           1
1049    Avika       0           1
10      Ronak       1           1

[260 rows x 3 columns]


In [67]:
# Support Vector Machine Classifier
svm_classifier = SVC()
svm_classifier.fit(X_train.reshape(-1, 1), y_train)
svm_predictions = svm_classifier.predict(X_test.reshape(-1, 1))
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("\nSVM Accuracy:", svm_accuracy)

# Display predictions with names and targets
svm_results = pd.DataFrame({'Name': label_encoder.inverse_transform(X_test), 'Target': y_test, 'Prediction': svm_predictions})
print("SVM Predictions:")
print(svm_results)


SVM Accuracy: 0.5115384615384615
SVM Predictions:
         Name  Target  Prediction
1231   Mrunal       0           0
578    Muskan       0           0
1164    Bobby       1           0
722    Monali       0           0
561     Hetul       1           0
...       ...     ...         ...
199   Ruchita       0           0
671   Meghana       0           0
1202    Meher       0           0
1049    Avika       0           1
10      Ronak       1           0

[260 rows x 3 columns]


In [68]:
# Logistic Regression Classifier
lr_classifier = LogisticRegression()
lr_classifier.fit(X_train.reshape(-1, 1), y_train)
lr_predictions = lr_classifier.predict(X_test.reshape(-1, 1))
lr_accuracy = accuracy_score(y_test, lr_predictions)
print("\nLogistic Regression Accuracy:", lr_accuracy)

# Display predictions with names and targets
lr_results = pd.DataFrame({'Name': label_encoder.inverse_transform(X_test), 'Target': y_test, 'Prediction': lr_predictions})
print("Logistic Regression Predictions:")
print(lr_results)


Logistic Regression Accuracy: 0.49230769230769234
Logistic Regression Predictions:
         Name  Target  Prediction
1231   Mrunal       0           0
578    Muskan       0           0
1164    Bobby       1           0
722    Monali       0           0
561     Hetul       1           0
...       ...     ...         ...
199   Ruchita       0           0
671   Meghana       0           0
1202    Meher       0           0
1049    Avika       0           0
10      Ronak       1           0

[260 rows x 3 columns]


In [69]:
# Neural Network Classifier
nn_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000)
nn_classifier.fit(X_train.reshape(-1, 1), y_train)
nn_predictions = nn_classifier.predict(X_test.reshape(-1, 1))
nn_accuracy = accuracy_score(y_test, nn_predictions)
print("\nNeural Network Accuracy:", nn_accuracy)

# Display predictions with names and targets
nn_results = pd.DataFrame({'Name': label_encoder.inverse_transform(X_test), 'Target': y_test, 'Prediction': nn_predictions})
print("Neural Network Predictions:")
print(nn_results)


Neural Network Accuracy: 0.4846153846153846
Neural Network Predictions:
         Name  Target  Prediction
1231   Mrunal       0           1
578    Muskan       0           1
1164    Bobby       1           1
722    Monali       0           1
561     Hetul       1           1
...       ...     ...         ...
199   Ruchita       0           1
671   Meghana       0           1
1202    Meher       0           1
1049    Avika       0           0
10      Ronak       1           1

[260 rows x 3 columns]
