In [None]:
def evaluation_ab(position, depth, alpha, beta):

  if depth == 0 or len(legal_moves(position)) == 0:
    return static_evluation(position)

  if white_to_move(position) == True:
    best_move_evaluation = -300 #300 is a commonly assigned value to the king by chess engines
    for move in legal_moves(position).sort(move_order_heuristics):
      node_evaluation = evaluation_ab(move, depth - 1, alpha, beta)
      best_move_evaluation = max(best_move_evaluation, node_evaluation)
      alpha = max(alpha, node_evaluation)
      if alpha >= beta:
        break
    return best_move_evaluation

  else:
    best_move_evaluation_black = 300
    for move in legal_moves(position).sort(move_order_heuristics):
      node_evaluation = evaluation_ab(move, depth - 1, alpha, beta)
      best_move_evaluation_black = min(best_move_evaluation_black, node_evaluation)
      beta = min(beta, node_evaluation)
      if alpha >= beta:
        break
    return best_move_evaluation_black 

def evaluation(current_position, depth): 
  return evaluation_ab(current_position, depth, -300, 300)

def search(s, nnet):
    if gameOver(s): return outcome(s)

    if s not in visited:
        visited.append(s)
        P[s], v = nnet.predict(s)
        return -v
  
    max_u, best_a = -float("inf"), random.choice(legalMoves(s))
    for a in legalMoves(s):
        u = Q[s][a] + c*P[s][a]*sqrt(sum(N[s]))/(1+N[s][a])
        if u > max_u:
            max_u = u
            best_a = a
    a = best_a
    
    smove = makeMove(s, a)
    v = search(smove, nnet)

    Q[s][a] = (N[s][a]*Q[s][a] + v)/(N[s][a]+1)
    global N[s][a]
    N[s][a] += 1
    return -v

def policy(s, nnet):
  for i in range(number_of_simulations):
    search(s, nnet)
  return [N[s][a] for a in legalMoves(s)]

def data(nnet):
  examples = []
  examples_per_game = []
  for i in range(number_of_selfplay_games):
    game = startingPosition()
    while True:
      pi = policy(game, nnet)
      examples_per_game.append((game, pi, 0))
      game = makeMove(game, max(pi))
      if gameOver(game):
        for example in examples_per_game:
          example[2] = outcome(game)
        break
    examples += examples_per_game
  return examples

def upgrade(nnet):
  return nnet.train(data(nnet))

def finalnet():
  nnet = nnet.initialise()
  for i in range(iterations):
    new_nnet = upgrade(nnet)
    win_ratio = pit(nnet, new_net, n = number_of_games)
    if win_ratio > threshold:
      nnet = new_nnet
  return nnet

# Python Project

# Importing libraries
import pandas as pd
import numpy as np
from numpy.core.fromnumeric import mean
# Importing classification algorithms from sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict

def model_accuracy(x_train, x_test, y_train, y_test, classifier, grid = {}, 
                   iterations = 1):
  # Initialising variable
  scores = []
  # Performing parameter grid search with 5-fold cross-validation a specified 
  # number of times; randomised search is performed for computationally heavy
  # models
  for i in range(iterations):
    # Singling out computationally heavy models
    if str(classifier) not in ["SVC()", "KMeans()"]:
      # Balanced accuracy takes into account both sensitivity and specificity, and
      # is hence a better accuracy measure than either of the two individually, or
      # than raw accuracy, which can misevaluate models that give unbalanced results
      grid_search = GridSearchCV(estimator = classifier, 
                                param_grid = grid,
                                n_jobs=-1, 
                                cv = 5, 
                                scoring = "balanced_accuracy").fit(x_train, y_train)
    else:
      grid_search = RandomizedSearchCV(estimator = classifier,
                                       param_distributions = grid,
                                       n_jobs=-1,
                                       cv = 5,
                                       scoring = "balanced_accuracy").fit(x_train, y_train)
    # Recording the best parameter combination and corresponding score for 
    # each iteration
    scores.append([grid_search.best_score_, grid_search.best_estimator_])
    # Finding the parameter combination which produces the best yield most often
    best_params = max(set([i[1] for i in scores]), key=scores.count)
    # Recording the yield of the best parameter combination
    best_yield = mean([i[0] for i in scores if i[1] == best_params])
  return [best_params, best_yield]

def accuracies_per_dataset(data):
  # Separating dependent variable from independent variables
  x = data.iloc[:, :-1].values
  y = data.iloc[:,-1].values
  # Splitting the data into training data and test data
  x_train, x_test, y_train, y_test = train_test_split(
      x, y, test_size = 0.3, random_state = 0)
  # Scaling the data
  StandardScaler().fit_transform(x_train)
  StandardScaler().fit_transform(x_test)

  # Generating parameter dictionaries and iteration numbers for all the models
  models = [[LogisticRegression(), 
            [{"penalty" : ["none", "l2"],
              "max_iter" : [500]},
              # Different solvers are required for diffent penalties
              {"penalty" : ["l1", "elasticnet"],
              "solver" : ["saga"],
              "l1_ratio" : [0.2, 0.4, 0.6, 0.8],
              # Increased maximum iterations to guarantee convergence
              "max_iter" : [500]}]],
            [MLPClassifier(),
            {"activation" : ["identity", "logistic", "tanh", "relu"],
            "solver" : ["lbfgs", "sgd", "adam"],
            "max_iter" : [500]},
            # Reduced number of iterations due to computational strain
            5],
            [KNeighborsClassifier(),
            {"n_neighbors" : [i for i in range(1,40)]},
            10],
            # Searching over gammas for the "poly" kernel is far too
            # computationally demanding
            [SVC(),
              [{"C" : [1, 10, 100],
                "kernel" : [ "rbf", "sigmoid"],
                "gamma" : [0.1, 1, 10]},
              {"C" : [1, 10, 100], 
               "kernel" : ["poly"], 
               "degree" : [1, 5, 10]}]],
            [DecisionTreeClassifier(),
            {"criterion" : ["gini", "entropy"],
            "splitter" : ["best", "random"],
            "min_samples_split" : [i for i in range(2, 10)]},
            10],
            [RandomForestClassifier(),
            {"criterion" : ["gini", "entropy"],
              # I have run a few tests and found that simply increasing the 
              # number of estimators improves the yield more than tinkering with
              # the other parameters
            "n_estimators" : [500]},
            10],
            [KMeans(),
            {"n_clusters" : [2],
            "n_init" : [1, 5, 10, 15, 20]}],
            [GaussianNB()]]

  # Initialising list of accuracies per classification model
  global accuracy_per_model
  accuracy_per_model = []
  # Computing the accuracy of each model
  for model in models:
    args = [x_train, x_test, y_train, y_test] + model
    accuracy_per_model.append(model_accuracy(*args))
  # Output list of model-model accuracy pairs
  return accuracy_per_model

# Reading the Diabetes dataset
df0 = pd.read_csv('diabetes.csv')

# Initialising a preliminarily data-cleaned version of the dataset
dfnan = df0.copy()
# Replacing zero values, which clearly represent missing values, with np.nan in 
# all columns other than "Pregnancies" and "Outcome"
dfnan[dfnan.drop(['Pregnancies', 'Outcome'], axis = 1) == 0] = np.nan

# Initialising the first fully data-cleaned version of the dataset
df1 = dfnan.copy()
# Selecting columns with np.nan
ind = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
# Filling missing values with mean column values in the appropriate columns
df1[ind] = df1[ind].fillna(df1.mean())

# Creating the second fullydata-cleaned version of the dataset
df2 = df1.drop(df1[np.any(
    (df1[ind] < df1[ind].quantile(0.25) - 
     1.5*(df1[ind].quantile(0.75)-df1[ind].quantile(0.25))) 
    | (df1[ind] > df1[ind].quantile(0.75) +
    1.5*(df1[ind].quantile(0.75)-df1[ind].quantile(0.25))), 
    1)].index)

# Creating the third fully data-cleaned version of the dataset
df3 = dfnan.drop(dfnan[np.any((np.isnan(dfnan)), 1)].index)

# Creating the fourth fully data-cleaned version of the dataset
df4 = df3.drop(df3[np.any(
    (df3[ind] < df3[ind].quantile(0.25) - 
     1.5*(df3[ind].quantile(0.75)-df3[ind].quantile(0.25))) 
    | (df3[ind] > df3[ind].quantile(0.75) +
    1.5*(df3[ind].quantile(0.75)-df3[ind].quantile(0.25))), 
    1)].index)

# Initialising accuracy lists
accuracy_of_datasets = []
accuracies_by_dataset = []
# Creating a list of thr datasets
data = [df0, df1, df2, df3, df4]
# Generating accuracy lists
for i in data:
  accuracies = accuracies_per_dataset(i)
  # A list containing the accuracy per model per dataset
  accuracies_by_dataset.append(accuracies)
  best_model_score = max([i[1] for i in accuracy_per_model])
  # A list containing the accuracy per dataset, determined by the accuracy of 
  # the best model for the dataset
  accuracy_of_datasets.append(best_model_score)
best_dataset_index = accuracy_of_datasets.index(max(accuracy_of_datasets))
best_dataset_accuracies = accuracies_by_dataset[best_dataset_index]

# Displaying model accuracy per dataset
for i in range(len(data)):
  if i == best_dataset_index:
    caption = "df%s (best dataset)" %i
  else:
    caption = "df%s" %i
  display(pd.DataFrame(accuracies_by_dataset[i],
          columns = ["Model", "Accuracy"]).style.set_caption(caption))
  
# Preprocessing for the best dataset
ds = data[best_dataset_index]
x = ds.iloc[:, :-1].values
y = ds.iloc[:,-1].values
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size = 0.3, random_state = 0)
StandardScaler().fit_transform(x_train)
StandardScaler().fit_transform(x_test)

# Initialising dataset of various measures of accuracy for the best dataset
df = pd.DataFrame(best_dataset_accuracies, columns = ["Model", "Balanced accuracy"])

# Initialising variables
confusion = []
norm_confusion = []
sensitivity = []
specificity = []
accuracy = []

# Recording the values of the different measures of accuracy
for model in df["Model"]:
  y_pred = cross_val_predict(model, x_train, y_train, cv = 5)
  tn, fp, fn, tp = confusion_matrix(y_train, y_pred).ravel()
  confusion.append(confusion_matrix(y_train, y_pred))
  norm_confusion.append(confusion_matrix(y_train, y_pred, 
                                         normalize = "true").round(2))
  sensitivity.append(tp / (tp+fn))
  specificity.append(tn / (tn+fp))
  accuracy.append((tp+tn) / (tp+tn+fp+fn))

# Adding the values to the dataset
df["Confusion matrix"] = confusion
df["Normalised confusion matrix"] = norm_confusion
df["Sensitivity"] = sensitivity
df["Specificity"] = specificity
df["Raw accuracy"] = accuracy

# Displaying the result
display(df.style.set_caption("df%s metadata" % best_dataset_index))

# Displaying the final model which maximises the balanced accuracy among the 
# datasets, classification algorithms, and parameter combinations tested
final = df.loc[df["Balanced accuracy"].idxmax()]
print("Final model")
display(final["Model"])

# Applying the final model to the test data to get our final accuracy measures
model = final["Model"]
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
final["Balanced accuracy"] = balanced_accuracy_score(y_test, y_pred)
final["Confusion matrix"] = confusion_matrix(y_test, y_pred)
final["Normalised confusion matrix"] = confusion_matrix(y_test, y_pred, 
                                         normalize = "true").round(2)
final["Sensitivity"] = tp / (tp+fn)
final["Specificity"] = tn / (tn+fp)
final["Raw accuracy"] = (tp+tn) / (tp+tn+fp+fn)
display(final)