# Importing necessary modules

In [1]:
import pandas as pd
import numpy as np
import string
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Loading the dataset/dataframe

In [2]:
data=pd.read_csv("final_all_names_code.csv")    #Read the csv file
data.head()                                     #Display the first 5 rows

Unnamed: 0,Name,Country_code,Country
0,Amy Johnson,ar_AE,AE
1,Thomas Wright,ar_AE,AE
2,Mr. Marco Flores DDS,ar_AE,AE
3,Marcus Robbins,ar_AE,AE
4,Susan Montgomery,ar_AE,AE


# Creating column names for new dataframe

In [3]:
columns=list(string.ascii_lowercase)+['Country','Country_code']
print(columns)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'Country', 'Country_code']


In [4]:
newdata=pd.DataFrame(columns=columns)   #Create new dataframe with the columns names
newdata

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,s,t,u,v,w,x,y,z,Country,Country_code


# Creating values from existing dataset for the new dataframe

In [5]:
values=[]
for i in range(1000):
    fullname=data['Name'][i]                     #Load the name from original dataset
    name=fullname.split()                        #Splitting the name into first name and last name
    lastname=name[-1]                            #We will consider only the last name for further processing
    lastname=lastname.lower()
    # print(lastname)
    cols=dict.fromkeys(columns,0)                #Create a new dictionary with keys as the columns of new dataframe and values of all of them will be 0
    for j in lastname:
        cols[j]=1                                #Set the values of only those characters/columns as 1 that appear in the lastname
    cols['Country']=data['Country'][i]           #Set the Country to that of the country the person belonged to in orginal dataset
    cols['Country_code']=data['Country_code'][i] #Set the CountryCode to that of the countrycode the person belonged to in orginal dataset
    values.append(cols)                          #Append the dictionary to the values list
valuesFrame=pd.DataFrame(values)                 #Create a dataframe with the values alone

# Concatnating the new dataframe with the values

In [6]:
newdata=pd.concat([newdata,valuesFrame],ignore_index=True)
newdata

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,t,u,v,w,x,y,z,Country,Country_code,.
0,0,0,0,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,AE,ar_AE,
1,0,0,0,0,0,0,1,1,1,0,...,1,0,0,1,0,0,0,AE,ar_AE,
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,AE,ar_AE,
3,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,AE,ar_AE,
4,0,0,0,0,1,0,1,0,0,0,...,1,0,0,0,0,1,0,AE,ar_AE,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,AE,ar_AE,
996,1,0,0,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,AE,ar_AE,
997,0,0,0,0,1,1,0,1,1,0,...,0,0,0,0,0,0,0,AE,ar_AE,
998,1,0,0,1,0,0,0,0,1,0,...,0,0,1,0,0,0,0,AE,ar_AE,


# Drop any additional columns added

In [7]:
newdata.drop(['.'],axis=1,inplace=True)   # Axis 1 represents that the colmn should be dropped and not row
newdata

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,s,t,u,v,w,x,y,z,Country,Country_code
0,0,0,0,0,0,0,0,1,0,1,...,1,0,0,0,0,0,0,0,AE,ar_AE
1,0,0,0,0,0,0,1,1,1,0,...,0,1,0,0,1,0,0,0,AE,ar_AE
2,0,0,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,AE,ar_AE
3,0,1,0,0,0,0,0,0,1,0,...,1,0,0,0,0,0,0,0,AE,ar_AE
4,0,0,0,0,1,0,1,0,0,0,...,0,1,0,0,0,0,1,0,AE,ar_AE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,AE,ar_AE
996,1,0,0,0,0,0,0,0,0,0,...,1,1,1,0,0,0,0,0,AE,ar_AE
997,0,0,0,0,1,1,0,1,1,0,...,1,0,0,0,0,0,0,0,AE,ar_AE
998,1,0,0,1,0,0,0,0,1,0,...,1,0,0,1,0,0,0,0,AE,ar_AE


In [8]:
X= newdata[list(string.ascii_lowercase)] #This is considered as features (Feature Dataset)
Y= newdata['Country']                    #This is the non-numeric label for the respective features (Target Dataset)
X

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,q,r,s,t,u,v,w,x,y,z
0,0,0,0,0,0,0,0,1,0,1,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,1,1,1,0,...,0,1,0,1,0,0,1,0,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,1,0,...,0,1,1,0,0,0,0,0,0,0
4,0,0,0,0,1,0,1,0,0,0,...,0,1,0,1,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
996,1,0,0,0,0,0,0,0,0,0,...,0,1,1,1,1,0,0,0,0,0
997,0,0,0,0,1,1,0,1,1,0,...,0,1,1,0,0,0,0,0,0,0
998,1,0,0,1,0,0,0,0,1,0,...,0,0,1,0,0,1,0,0,0,0


# Preprocessing for training the model

In [9]:
X= StandardScaler().fit_transform(X)  #Converts the data into a Normal Distribution (ie) with mean=0 ans sd=1
Y= LabelEncoder().fit_transform(Y)    #Converts the non numeric labels to numeric labels

# Splitting the datset into traing and testing dataset

In [10]:
xTrain, xTest, yTrain, yTest = train_test_split(X,Y,test_size=0.2,random_state=0) #20% data for testing 80% data for training

# Creating a MLP Classifier model

In [11]:
model=MLPClassifier(
    hidden_layer_sizes=(100,50,30),
    solver='adam',
    verbose=True,
    activation='relu',
    random_state=42,
    max_iter=500,
    n_iter_no_change=30,
    learning_rate_init=0.0001
)

# Training and Testing the Model

In [12]:
model.fit(xTrain,yTrain)               #Training the model
yPred=model.predict(xTest)             #Predicting label for testing data
accuracy=accuracy_score(yTest,yPred)   #Checking how accurately the predicted and actual label match for testing data
print("Accuracy = ",accuracy*100)

Iteration 1, loss = 0.70405455
Iteration 2, loss = 0.68610987
Iteration 3, loss = 0.66880414
Iteration 4, loss = 0.65198205
Iteration 5, loss = 0.63583216
Iteration 6, loss = 0.62003955
Iteration 7, loss = 0.60465765
Iteration 8, loss = 0.58973961
Iteration 9, loss = 0.57488982
Iteration 10, loss = 0.56031758
Iteration 11, loss = 0.54587288
Iteration 12, loss = 0.53172241
Iteration 13, loss = 0.51770307
Iteration 14, loss = 0.50393970
Iteration 15, loss = 0.49013806
Iteration 16, loss = 0.47645284
Iteration 17, loss = 0.46274579
Iteration 18, loss = 0.44890141
Iteration 19, loss = 0.43515566
Iteration 20, loss = 0.42149487
Iteration 21, loss = 0.40786001
Iteration 22, loss = 0.39401775
Iteration 23, loss = 0.38031630
Iteration 24, loss = 0.36666453
Iteration 25, loss = 0.35326545
Iteration 26, loss = 0.33997543
Iteration 27, loss = 0.32701670
Iteration 28, loss = 0.31420338
Iteration 29, loss = 0.30157630
Iteration 30, loss = 0.28928988
Iteration 31, loss = 0.27725201
Iteration 32, los