In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
%matplotlib inline 


In [None]:


# read data from the Excel file
data = pd.read_csv('HIGGS_train.csv')

# Set the column names
column_names = ['class label', 'lepton pT', 'lepton eta', 'lepton phi', 'missing energy magnitude', 
                'missing energy phi', 'jet 1 pt', 'jet 1 eta', 'jet 1 phi', 'jet 1 b-tag', 
                'jet 2 pt', 'jet 2 eta', 'jet 2 phi', 'jet 2 b-tag', 'jet 3 pt', 'jet 3 eta', 
                'jet 3 phi', 'jet 3 b-tag', 'jet 4 pt', 'jet 4 eta', 'jet 4 phi', 'jet 4 b-tag', 
                'm jj', 'm jjj', 'm lv', 'm jlv', 'm bb', 'm wbb', 'm wwbb']

data.columns = column_names


In [None]:

for col in data.columns:
    n_miss = data[col].isnull().sum()
    perc = n_miss / data.shape[0] * 100
    print('> %s, Missing: %d (%.1f%%)' % (col, n_miss, perc))


In [None]:
from numpy import NAN
# remove NAs values:
data=data.dropna()
for col in data.columns:
    n_miss = data[col].isnull().sum()
    perc = n_miss / data.shape[0] * 100
    print('> %s, Missing: %d (%.1f%%)' % (col, n_miss, perc))

In [None]:
for i in data.columns:

  data[i] = pd.to_numeric(data[i], errors='coerce') # errors will convert non convertable data to NAN

# drop rows with NaN values
data.dropna(inplace=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:, 1:], data.iloc[:,0],test_size=0.2,random_state=7651)


In [None]:
def build(solver,penalty, C ):
  logreg = LogisticRegression(solver=solver,penalty=penalty, C=C )
  logreg.fit(X_train, y_train)
  y_pred = logreg.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  return (accuracy_score(y_train,logreg.predict(X_train)),accuracy)


solvers = ['newton-cg', 'lbfgs', 'liblinear']
penalties = ['l1', 'l2' , 'none']
C_values = [0.1, 1.0, 3]


results_table = pd.DataFrame(columns=['Solver', 'Penalty', 'C', 'Train Accuracy', 'Test Accuracy'])


for solver in solvers:
    for penalty in penalties:
      if solver == solvers[0]:
        if penalty in [penalties[1],penalties[-1]]:
          for C in C_values:
              train_acc, test_acc = build(solver=solver, penalty=penalty, C=C)
              
              results_table = results_table.append({
                  'Solver': solver,
                  'Penalty': penalty,
                  'C': C,
                  'Train Accuracy': train_acc,
                  'Test Accuracy': test_acc
              }, ignore_index=True)
      elif solver==solvers[1]:
        if penalty in [penalties[-1]]:
          for C in C_values:
              train_acc, test_acc = build(solver=solver, penalty=penalty, C=C)
              results_table = results_table.append({
                  'Solver': solver,
                  'Penalty': penalty,
                  'C': C,
                  'Train Accuracy': train_acc,
                  'Test Accuracy': test_acc
              }, ignore_index=True)
      elif solver == solvers[-1] and penalty != penalties[-1]:
        for C in C_values:
              train_acc, test_acc = build(solver=solver, penalty=penalty, C=C)
              results_table = results_table.append({
                  'Solver': solver,
                  'Penalty': penalty,
                  'C': C,
                  'Train Accuracy': train_acc,
                  'Test Accuracy': test_acc
              }, ignore_index=True)
        else:
            pass

print(results_table)

In [None]:
# MLPClassifier:

model =  MLPClassifier(hidden_layer_sizes=(100,50,1),activation='relu',solver='adam')
model.fit(X_train,y_train)
model_predict=model.predict(X_test)
print('accuracy of model is:',accuracy_score(y_test,model_predict))

In [None]:
model =  MLPClassifier(hidden_layer_sizes=(100,50,55,345,30),activation='relu',solver='adam',alpha=0.01,random_state=7651)
model.fit(X_train,y_train)
model_predict=model.predict(X_test)


In [None]:
print('accuracy of model is:',accuracy_score(y_test[:20000],model_predict))

accuracy of model is: 0.62595


In [None]:
model =  MLPClassifier(hidden_layer_sizes=(100,50,40,30,1),activation='relu',solver='adam')
model.fit(X_train[:40000],y_train[:40000])
model_predict=model.predict(X_test[:40000])
print('accuracy of model is:',accuracy_score(y_test[:40000],model_predict))

In [None]:
model =  MLPClassifier(hidden_layer_sizes=(100,50,55,45,10),activation='relu',solver='adam',alpha=.01,random_state=9651)
model.fit(X_train,y_train)
model_predict=model.predict(X_test)
model_predict_train=model.predict(X_train)
print('training accuracy of model is:',accuracy_score(y_train,model_predict_train))
print('testing accuracy of model is:',accuracy_score(y_test,model_predict))

In [None]:
model =  MLPClassifier(hidden_layer_sizes=(100,50,1),activation='relu',solver='adam',alpha=.1,random_state=987218764)
model.fit(X_train,y_train)
model_predict=model.predict(X_test)
model_predict_train=model.predict(X_train)
print('training accuracy of model is:',accuracy_score(y_train,model_predict_train))
print('testing accuracy of model is:',accuracy_score(y_test,model_predict))