# Neural Network

In [15]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt



# Loading Data
data = pd.read_csv("TrainData.csv")
data = data.drop(["Unnamed: 0"],axis=1)

X = data.drop(["salary_label"],axis=1)
y = data["salary_label"]

# Training and validating using GridsearchCV

In [18]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier



NN = Pipeline([('scaler',  MinMaxScaler()),
            ('NN', MLPClassifier())])


param_grid = [
        {
            'NN__activation' : ['relu'],
            'NN__solver' : [ 'adam'],
            'NN__hidden_layer_sizes': [
             (20,),(30,),(40,),(60),(70,),(80,),(100)
             ]
        }
       ]


grid_search = GridSearchCV(NN, param_grid = param_grid, scoring = 'accuracy', cv=5, return_train_score = True)
grid_search.fit(X, y)

print(grid_search.best_params_)



{'NN__activation': 'relu', 'NN__hidden_layer_sizes': (70,), 'NN__solver': 'adam'}




# General performance on train/validation dataset

In [22]:
print("=========Train Acurracy=========")
print(np.mean(grid_search.cv_results_["split0_train_score"]))
print(np.mean(grid_search.cv_results_["split1_train_score"]))
print(np.mean(grid_search.cv_results_["split2_train_score"]))
print(np.mean(grid_search.cv_results_["split3_train_score"]))
print("mean:")
print(np.mean(
    [
        np.mean(grid_search.cv_results_["split0_train_score"]),
        np.mean(grid_search.cv_results_["split1_train_score"]),
        np.mean(grid_search.cv_results_["split2_train_score"]),
        np.mean(grid_search.cv_results_["split3_train_score"])
    ]
))


print("=========Test Acurracy=========")
print(np.mean(grid_search.cv_results_["split0_test_score"]))
print(np.mean(grid_search.cv_results_["split1_test_score"]))
print(np.mean(grid_search.cv_results_["split2_test_score"]))
print(np.mean(grid_search.cv_results_["split3_test_score"]))
print("mean:")
print(np.mean(
    [
        np.mean(grid_search.cv_results_["split0_test_score"]),
        np.mean(grid_search.cv_results_["split1_test_score"]),
        np.mean(grid_search.cv_results_["split2_test_score"]),
        np.mean(grid_search.cv_results_["split3_test_score"])
    ]
))

0.5863039399624765
0.5853658536585366
0.590368980612883
0.5898718349484214
mean:
0.5879776522955794
0.3775
0.37
0.36875
0.3617021276595745
mean:
0.3694880319148936


# Final test on test data set

In [21]:
from sklearn.metrics import accuracy_score
df = pd.read_csv("TestData.csv")

X_train = X
y_train = y

X_test = df.drop(["salary_label","Unnamed: 0"],axis=1)
y_test = df["salary_label"]


scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

params = {'activation': 'relu', 'hidden_layer_sizes': 70, 'solver': 'adam'}
model = MLPClassifier(**params)

model.fit(X_train,y_train)
y_pred = model.predict(X_test)

test_acc = accuracy_score(y_test,y_pred)

print("=======Accuracy on test data set=======")
print(test_acc)

0.403


