In [2]:
# Import Important Libraries
import sklearn
from sklearn.linear_model import LogisticRegression # importing Sklearn's logistic regression's module
from sklearn import preprocessing # preprossing is what we do with the data before we run the learning algorithm
from sklearn.model_selection import train_test_split 
import numpy as np
from scipy.io import arff
import numpy as np
# import math

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
# Load the .arff file
loaded = arff.loadarff('../connect-4.arff')

# Convert to a numpy array
data = np.asarray(loaded[0].tolist(), dtype=np.float32)

# Print the array
print(data)

[[0. 0. 0. ... 0. 0. 2.]
 [0. 0. 0. ... 0. 0. 2.]
 [0. 0. 0. ... 0. 0. 2.]
 ...
 [2. 2. 0. ... 0. 0. 1.]
 [2. 1. 0. ... 2. 0. 0.]
 [2. 1. 1. ... 0. 0. 0.]]


# Data Pre-Processing
Scale after splitting the data into train and test since we will be using gradient ascent. 
* Use `train_test_split` to split the data (`75% train` and `25% test`) to `X_train`, `X_test`, `y_train`, `y_test` with `random_state` of 42
* Reshape `y_train` into 2D array `y_2d_train` and `y_test` into 2D array `y_2d_test`
* Augment the dataset with a column of ones

In [4]:
X = data[:, :-1]
y = data[:, -1:]

# quick fix to make y be 0 or 1 we do not have an opponent but it is only us and the next optimal move
y = y_2 = np.where(y == 2, 1, 0) # * data for 1 is polluted only 0 is reliable here

# y_1 = np.where(y == 1, 1, 0) # * data for 1 is polluted only 0 is reliable here

# y_0 = np.where(y == 0, 1, 0) # * data for 1 is polluted only 0 is reliable here

# ! Example of behavior
# y = [
#     1
#     2
#     0
# ]

# y_2 = [
#     0
#     1
#     0
# ]

In [5]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train = y_train.ravel()
y_test = y_test.ravel()

# ! No need to scale since the data is already the same scale

In [6]:
X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

# Implementing Logistic Regression Using Gradient Ascent


In [7]:
# Create object of logistic regression model. So we don't use any regularization, we can set the penalty to `none` 
# or set C to a very large value (for example, C = 100000000), 
# to make lambda (C = 1/lambda) nearly 0.
from sklearn import linear_model
logreg = linear_model.LogisticRegression(penalty = None)



In [8]:
logreg.fit(X_train, y_train)

In [9]:
w_logreg = logreg.coef_
intercept_logreg = logreg.intercept_

In [10]:
# VERIFY - Compare the parameters computed by logreg model and gradient ascent. They should be nearly same.
print('w_logreg: ', w_logreg)
print('intercept_logreg: ', intercept_logreg)

w_logreg:  [[ 0.16914862 -0.21918161  0.03505925  0.14731665  0.09451187 -0.04665616
   0.11860856 -0.22779288  0.39362839 -0.05678718  0.24188048 -0.13896785
   0.07504892 -0.11348435  0.24692117  0.09445764  0.01085524 -0.06976475
   0.03068709  0.18950411  0.12511787  0.06497909 -0.09595552 -0.01534555
   0.03099926  0.02805868  0.23231159  0.26467891  0.01201904 -0.14466824
  -0.0908245   0.0433155   0.27334405  0.0216795   0.26675131 -0.22120601
   0.06545755 -0.11006232  0.06488916  0.12094161  0.14427742 -0.06898606
   0.21216656]]
intercept_logreg:  [0.16914862]


In [11]:
# TODO Q19
# Find the predicted values on test set (X_test not X_test_1) using logreg.predict
y_hat_logreg = logreg.predict(X_test)

# Find the accuracy achieved on test set using logreg.score and y_test 
acc_logreg = logreg.score(X_test, y_test)

print("Accuracy on training data = %f" % acc_logreg)

Accuracy on training data = 0.663410


In [12]:
from sklearn.metrics import precision_recall_fscore_support

prec, recal, fscore, sup = precision_recall_fscore_support(y_test, y_hat_logreg, average='binary')


In [13]:
# VERIFY
print('prec: ', prec)
print('recal: ', recal)
print('fscore: ', fscore)

prec:  0.6729539641943734
recal:  0.9486255069851285
fscore:  0.7873573966710306


In [14]:
from sklearn.preprocessing import PolynomialFeatures

In [15]:
lambda1 = 0  # We set lambda1 = 0 since we are not using any regularization in part 2.

model_degree = range(1,4) # The different feature transformatons we will perform

for d in model_degree:
    print('Order: ', d)
    poly = PolynomialFeatures(d) # * generate all types of polynomial features up to degree d
    X_tr_poly = poly.fit_transform(X_train) # * transforms the training data to have those polynomial features
    # X_val_poly = poly.transform(X_val) # * transforms the validation data to have those polynomial features
    
    logreg = LogisticRegression()
    logreg.fit(X_tr_poly, y_train)
    
    X_test_poly = poly.transform(X_test)
    y_pred = logreg.predict(X_test_poly)
    
    # Find the accuracy achieved on test set using logreg.score and y_test 
    acc_logreg = logreg.score(X_test_poly, y_test)
    print("Accuracy on training data = %f" % acc_logreg)
    
    prec, recal, fscore, sup = precision_recall_fscore_support(y_test, y_pred, average='binary')
    print('-------------------------')

Order:  1
Accuracy on training data = 0.663484
-------------------------
Order:  2


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy on training data = 0.800252
-------------------------
Order:  3


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy on training data = 0.846729
-------------------------
