# Final Project: KMeans Clustering with PCA

## Teammates: Prakhar Saxena, Stephen Hansen, Tharindu Mendis

## Imports

In [12]:
import pandas as pd

import numpy as np
from numpy import dot as dot
from numpy.linalg import svd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
import matplotlib.cm as cm

import math
import copy
import random
from statistics import mode, StatisticsError
from PIL import Image

### Feature Column Splitting

In [13]:
def modify_data(df):
    for feature_split in feature_modifications:
        df = feature_split(df)
    x = df[feature_cols]
    full_cols = feature_cols[:]
    full_cols.append('Category')
    df_ = df[full_cols]
    y = output_col_mod(df)
    return x, y, df_

def date_time_split(df):
    def time_in_mins_since_midnight(time):
        return (time.hour * 60) + time.minute

    df['Time'] = pd.to_datetime(df['Dates']).dt.time.map(time_in_mins_since_midnight)

    def time_bin(hour):
        for bin_ in time_bins:
            if hour < bin_:
                return time_bins[bin_]

    df['Time_Class'] = pd.to_datetime(df['Dates']).dt.hour.map(time_bin)
    return df

def day_map(df):
    df = df.replace({"DayOfWeek": day_mapping})
    return df

def round_xy(df):
    if 'X' in feature_cols:
        df = df.round({'X': 2})
    if 'Y' in feature_cols:
        df = df.round({'Y': 2})
    return df

def drop_na(df):
    return df.dropna(subset=feature_cols)

# Modify Category to only include the top 5 most common crime categories 
# and everything else as other. Then assign values to each
def output_col_mod(df):
    df = df[['Category']]
    cols = output_col_mapping.keys()
    df = df.Category.map(output_col_mapping).fillna(output_col_mapping["OTHER OFFENSES"]).astype(int)
    df = df.to_frame().reset_index()
    df = df[['Category']]
    return df

### Defintions

In [14]:
dataset_dir = '../CS383_datasets/'

# Include any columns that might be produced as a result of any feature modification functions
# All columns will be numeric and this is being enforced
# X and Y will be automatically rounded if in feature_cols
# Drop_na should always be run last in feature modifications
feature_cols = ['Time', 'Time_Class', 'DayOfWeek', 'X', 'Y']
feature_modifications = [
    date_time_split,
    day_map,
    round_xy,
    drop_na
]


output_col = 'Category'

# How many different classifications to make?
output_col_mapping = {
    'LARCENY/THEFT': 0, 
    'NON-CRIMINAL': 1, 
    'ASSAULT': 2,
    'DRUG/NARCOTIC': 3,
    'VEHICLE THEFT': 4,
#     Includes other offenses and all offenses not included above
    'OTHER OFFENSES': 5,
}

# {'LARCENY/THEFT': 174900, 'OTHER OFFENSES': 126182, 'NON-CRIMINAL': 92304, 'ASSAULT': 76876, 
# 'DRUG/NARCOTIC': 53971, 'VEHICLE THEFT': 53781, 'VANDALISM': 44725, 'WARRANTS': 42214, 
# 'BURGLARY': 36755, 'SUSPICIOUS OCC': 31414, 'MISSING PERSON': 25989, 'ROBBERY': 23000, 
# 'FRAUD': 16679, 'FORGERY/COUNTERFEITING': 10609, 'SECONDARY CODES': 9985, 'WEAPON LAWS': 8555, 
# 'PROSTITUTION': 7484, 'TRESPASS': 7326, 'STOLEN PROPERTY': 4540, 'SEX OFFENSES FORCIBLE': 4388, 
# 'DISORDERLY CONDUCT': 4320, 'DRUNKENNESS': 4280, 'RECOVERED VEHICLE': 3138, 'KIDNAPPING': 2341, 
# 'DRIVING UNDER THE INFLUENCE': 2268, 'RUNAWAY': 1946, 'LIQUOR LAWS': 1903, 'ARSON': 1513, 
# 'LOITERING': 1225, 'EMBEZZLEMENT': 1166, 'SUICIDE': 508, 'FAMILY OFFENSES': 491, 'BAD CHECKS': 406, 
# 'BRIBERY': 289, 'EXTORTION': 256, 'SEX OFFENSES NON FORCIBLE': 148, 'GAMBLING': 146, 
# 'PORNOGRAPHY/OBSCENE MAT': 22, 'TREA': 6}


# Keys are evaluated as: hour is less than key
# Cycle runs from midnight 00:00 to 23:59
time_bins = {
    4: 0, # Before 4am is 0
    6: 1, # Before 6am is 1
    12: 2, # Before 12pm is 2
    18: 3, # Before 4pm is 3
    24: 4, # Before midnight is 4
}

day_mapping = {
    'Sunday': 0,
    'Monday': 1,
    'Tuesday': 2,
    'Wednesday': 3,
    'Thursday': 4,
    'Friday': 5,
    'Saturday': 6,
}

## General Functions

In [15]:
def test_train_split(x, y, shuffle=True, test_size=0.2):
    return train_test_split(x, y, test_size=test_size, random_state=0, shuffle=shuffle)

# Standardize by subtracting column mean and divide by the standard deviation of the column
def standardize_data(training_data, testing_data=None, std_mean=False, numpy_array=False):
    if std_mean:
        std = np.std(training_data, axis=0, ddof=1)
        mean = np.mean(training_data, axis=0)
        if testing_data is None:
            return (training_data - mean) / std, 0, std, mean
        return (training_data - mean) / std, (testing_data - mean) / std, std, mean
    else:
        scaler = StandardScaler()
        if numpy_array:
            scaler.fit(training_data)
            training_data = scaler.transform(training_data)
            testing_data = scaler.transform(testing_data)
        else:
            scaler.fit(training_data.to_numpy())
            training_data = scaler.transform(training_data.to_numpy())
            testing_data = scaler.transform(testing_data.to_numpy())
        return training_data, testing_data


# Prepare and Get test train data
def get_data():
    # Fixing random state for reproducibility
    np.random.seed(0)

    train = pd.read_csv(dataset_dir + 'train.csv')
    
    train = train.sample(n=1000)
#     train = train
#     train = train[train.Category.isin(['LARCENY/THEFT', 'NON-CRIMINAL', 'ASSAULT', 'DRUG/NARCOTIC', 'VEHICLE THEFT'])]

    x, y, df = modify_data(train)

    X_train, X_test, y_train, y_test = test_train_split(x, y)

    # Standardize training and testing data using training
    # (Feature columns only)
    X_train, X_test = standardize_data(X_train, X_test)

    # Fixing random state for reproducibility again cause optional preparation steps
    # might have random steps
    np.random.seed(0)

    return X_train, X_test, y_train.to_numpy(), y_test.to_numpy(), df

# K-Means Clustering with PCA

## Local Functions

In [16]:
# Get face data as per professors code
def fetch_face_data():
    people = fetch_lfw_people(min_faces_per_person=20, resize=0.7)
    mask = np.zeros(people.target.shape, dtype=np.bool)
    for target in np.unique(people.target):
        mask[np.where(people.target == target)[0][:50]] = 1
    X_people = people.data[mask]
    y_people = people.target[mask]
    X_people = X_people/255
    return X_people, y_people


# Calculate euclidean distance
def euclidean_distance(a, b):
    return np.sqrt(np.sum(np.square(a - b)))


def manhattan_distance(a, b):
    return np.sum(np.abs(a - b))


# Get Principal Components Analysis Vectors and Vals using eigh.
# I found eigh to have non complex vectors 
def pca_vv(data):
    std_data, _, std, mean = standardize_data(data, std_mean=True)

    # Calculate covariance matrix
    cv = np.cov(std_data.T)

    # Calculate the eigenvectors using eig on the covariance matrix
    # vals, vectors = np.linalg.eig(cv)
    vals, vectors = np.linalg.eigh(cv)

    return std_data, vals, vectors, std, mean


# Project and Restructure
def pca_dr(std_data, vals, vectors, d, whitening=False):
    # Take d highest eigenvectors and eigenvals
    idx = vals.argsort()[-d:][::-1]
    vals = vals[idx]
    vectors_subset_ = vectors[:, idx]

    # Take the dot product to get the projection matrix
    z = np.dot(std_data, vectors_subset_)

    # If whitening, then add a diagonal matrix
    if whitening:
        alpha = np.diag(1. / np.sqrt(vals))
        z = np.dot(z, alpha)

    r = np.dot(z, vectors_subset_.T)
    return z, r


def myKMeans(X, Y, k):
    # Fixing random state for reproducibility
    np.random.seed(0)

    # Choosing k random rows to serve as the Intial Vectors
    r = np.random.choice(X.shape[0], k, replace=False).tolist()

    plot_data = []
    rows = []

    r = sorted(r, reverse=True)

    for idx, val in enumerate(r):
        rows.append({"ref": X[val], "class_rows": None, "idx": None, "match_count": 0})

    # I decided to remove the reference vectors from the dataset
    for idx, val in enumerate(r):
        X = np.delete(X, val, axis=0)
        Y = np.delete(Y, val, axis=0)

    plot_data.append(rows)
    purity = []

    previous_change_sum = None
    current_change_sum = None
    test_break_condition = False
    epsilon = pow(2, -23)
    counter = 0
    while True:
        vec = copy.deepcopy(plot_data[-1])
        for idx, vector in enumerate(vec):
            vec[idx]["class_rows"] = None
        # Using the euclidean distance to put each person in the proper reference vector set.
        for master_idx, person in enumerate(X):
            smallest = None
            smallest_idx = 0
            for idx, vector in enumerate(vec):
                if smallest is None:
                    smallest = euclidean_distance(person, vector["ref"])
                    smallest_idx = idx
                else:
                    new_small = euclidean_distance(person, vector["ref"])
                    if new_small < smallest:
                        smallest_idx = idx
                        smallest = new_small
            if vec[smallest_idx]["class_rows"] is None:
                vec[smallest_idx]["class_rows"] = person
                vec[smallest_idx]["idx"] = [master_idx]
            else:
                vec[smallest_idx]["class_rows"] = np.vstack((vec[smallest_idx]["class_rows"], person))
                vec[smallest_idx]["idx"].append(master_idx)

        # Calculating purity
        for idx, vector in enumerate(vec):
            pos_one_count = 0
            neg_one_count = 0
            for y_idx in vector["idx"]:
                if Y[y_idx] == 1:
                    pos_one_count += 1
                else:
                    neg_one_count += 1
            if pos_one_count >= neg_one_count:
                vec[idx]["purity"] = pos_one_count / len(vec[idx]["idx"])
            else:
                vec[idx]["purity"] = neg_one_count / len(vec[idx]["idx"])

        purity_ = 0
        for idx, vector in enumerate(vec):
            purity_ += vec[idx]["purity"] * len(vec[idx]["idx"])
        purity.append(purity_/len(Y))

        # Using the manhattan distance to calculate the change of magnitude summation
        for idx, vector_set in enumerate(vec):
            vec[idx]["ref"] = np.mean(vector_set["class_rows"], axis=0)
            if current_change_sum is not None:
                vec[idx]["moc"] = manhattan_distance(plot_data[-1][idx]["ref"], vec[idx]["ref"])

        if current_change_sum is not None:
            current_change_sum = 0
            for idx, vector_set in enumerate(vec):
                current_change_sum += vector_set["moc"]

        plot_data.append(vec)
        if len(plot_data) >= 3:
            plot_data = plot_data[-2:]

        if current_change_sum is not None:
            if test_break_condition:
                # If change is less than epsilon, break
                if abs(current_change_sum - previous_change_sum) < epsilon:
                    break
            else:
                test_break_condition = True
        else:
            current_change_sum = 0

        counter += 1
        previous_change_sum = copy.deepcopy(current_change_sum)

    #     My custom alogirthm to map to class labels based on classification
    ret_val = []
    total_count = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
    c = []
    for row in plot_data[-1]:
        sub = np.take(Y, row["idx"], 0)
        m = np.bincount(sub[:, 0]).tolist()
        for idx, val in enumerate(m):
            total_count[idx] += val
        c.append(m)
        ret_val.append({"ref": row["ref"], "y": None, "purity": row["purity"]})

    counts = []
    finalized = {
        0: False, 1: False, 2: False, 3: False, 4: False, 5: False
    }
    for q_idx, count in enumerate(c):
        append_count = []
        for w_idx, ro in enumerate(count):
            append_count.append(round((ro/total_count[w_idx])*100, 2))
        append_count = sorted(range(len(append_count)), key=lambda k: append_count[k])
        counts.append(append_count)
        s_idx = 5
        while True:
            found = False
            if finalized[append_count.index(s_idx)]:
                s_idx -= 1
            else:
                finalized[append_count.index(s_idx)] = True
                ret_val[q_idx]["y"] = append_count.index(s_idx)
                found = True
            if found:
                break
    
    return ret_val


def test_model(refs, testing_X, testing_Y):
    correct = 0
    wrong = 0
    for idx, row in enumerate(testing_X):
        smallest = None
        pred = 0
        for ref_idx, ref in enumerate(refs):
            if smallest is None:
                smallest = euclidean_distance(row, ref["ref"])
                pred = ref["y"]
            else:
                new_small = euclidean_distance(row, ref["ref"])
                if new_small < smallest:
                    pred = ref["y"]
                    smallest = new_small
        if pred == 0 and testing_Y[idx] == 0:
            correct += 1
        elif pred == 1 and testing_Y[idx] == 1:
            correct += 1
        elif pred == 2 and testing_Y[idx] == 2:
            correct += 1
        elif pred == 3 and testing_Y[idx] == 3:
            correct += 1
        elif pred == 4 and testing_Y[idx] == 4:
            correct += 1
        elif pred == 5 and testing_Y[idx] == 5:
            correct += 1
        else:
            wrong += 1

#     precision = tp / (tp + fp)
#     recall = tp / (tp + fn)
#     f_measure = (2 * precision * recall) / (precision + recall)
    accuracy = correct / (correct+wrong)
#     print("Precision: {}".format(precision * 100))
#     print("Recall: {}".format(recall * 100))
#     print("F-measure: {}".format(f_measure * 100))
    print("Accuracy: {}".format(accuracy * 100))

### Using my K-Means Algorithm with PCA d=5

In [17]:
X_train, X_test, y_train, y_test, df = get_data()
# df is pandas dataframe before standardizing data
# y_test and X_test are Category data and style can be found above
# X_train and y_train cols are feature cols in the order
# Currently Feature Cols are: ['Time', 'Time_Class', 'DayOfWeek', 'X', 'Y']
print("Running PCA")
std_data, vals, vectors, std, mean = pca_vv(X_train)
z2, r2 = pca_dr(std_data, vals, vectors, 5)

print("Running K-Means Clustering")
kmeans = KMeans(n_clusters=6, random_state=0).fit(z2)
refs = myKMeans(z2, y_train, 6)
print("Clustering has ended!")
print("\nPurity of Refs:")
for ref in refs:
    print(ref)

train_, test_ = standardize_data(X_train, X_test, numpy_array=True)
test_z2, test_r2 = pca_dr(test_, vals, vectors, 5)
test_model(refs, test_z2, y_test)

Running PCA
Running K-Means Clustering
Clustering has ended!

Purity of Refs:
{'ref': array([-0.21189379, -1.94959232,  0.66694336, -0.89039051,  0.02348225]), 'y': 2, 'purity': 0.8658536585365854}
{'ref': array([ 2.71144642,  0.11812763,  0.1433275 ,  0.07438304, -0.13149775]), 'y': 4, 'purity': 0.8990825688073395}
{'ref': array([-0.6977257 ,  0.75953069,  0.82042515,  0.0097729 ,  0.01055863]), 'y': 0, 'purity': 0.8721461187214612}
{'ref': array([ 0.48067296,  0.37453129, -0.71843771, -0.36302706,  0.10020231]), 'y': 3, 'purity': 0.8951048951048951}
{'ref': array([-1.25512808,  0.22794632, -0.85663716, -0.51872942, -0.05145264]), 'y': 1, 'purity': 0.8636363636363636}
{'ref': array([-0.41901133, -0.73939077, -0.36445396,  1.33509654,  0.00833069]), 'y': 5, 'purity': 0.9007633587786259}
Accuracy: 17.0


### Using my K-Means Algorithm with PCA d=4

In [18]:
X_train, X_test, y_train, y_test, df = get_data()
# df is pandas dataframe before standardizing data
# y_test and X_test are Category data and style can be found above
# X_train and y_train cols are feature cols in the order
# Currently Feature Cols are: ['Time', 'Time_Class', 'DayOfWeek', 'X', 'Y']
print("Running PCA")
std_data, vals, vectors, std, mean = pca_vv(X_train)
z2, r2 = pca_dr(std_data, vals, vectors, 4)

print("Running K-Means Clustering")
kmeans = KMeans(n_clusters=6, random_state=0).fit(z2)
refs = myKMeans(z2, y_train, 6)
print("Clustering has ended!")
print("\nPurity of Refs:")
for ref in refs:
    print(ref)

train_, test_ = standardize_data(X_train, X_test, numpy_array=True)
test_z2, test_r2 = pca_dr(test_, vals, vectors, 4)
test_model(refs, test_z2, y_test)

Running PCA
Running K-Means Clustering
Clustering has ended!

Purity of Refs:
{'ref': array([-0.21189379, -1.94959232,  0.66694336, -0.89039051]), 'y': 2, 'purity': 0.8658536585365854}
{'ref': array([2.70635246, 0.12277801, 0.12771246, 0.07207263]), 'y': 4, 'purity': 0.9}
{'ref': array([-0.69940433,  0.75958174,  0.82436274,  0.01283566]), 'y': 0, 'purity': 0.8715596330275229}
{'ref': array([ 0.45714505,  0.37190388, -0.70374815, -0.36247766]), 'y': 3, 'purity': 0.8958333333333334}
{'ref': array([-1.25512808,  0.22794632, -0.85663716, -0.51872942]), 'y': 1, 'purity': 0.8636363636363636}
{'ref': array([-0.41896949, -0.74413571, -0.36618438,  1.3438724 ]), 'y': 5, 'purity': 0.9}
Accuracy: 17.0


### Using my K-Means Algorithm with PCA d=3

In [19]:
X_train, X_test, y_train, y_test, df = get_data()
# df is pandas dataframe before standardizing data
# y_test and X_test are Category data and style can be found above
# X_train and y_train cols are feature cols in the order
# Currently Feature Cols are: ['Time', 'Time_Class', 'DayOfWeek', 'X', 'Y']
print("Running PCA")
std_data, vals, vectors, std, mean = pca_vv(X_train)
z2, r2 = pca_dr(std_data, vals, vectors, 3)

print("Running K-Means Clustering")
kmeans = KMeans(n_clusters=6, random_state=0).fit(z2)
refs = myKMeans(z2, y_train, 6)
print("Clustering has ended!")
print("\nPurity of Refs:")
for ref in refs:
    print(ref)

train_, test_ = standardize_data(X_train, X_test, numpy_array=True)
test_z2, test_r2 = pca_dr(test_, vals, vectors, 3)
test_model(refs, test_z2, y_test)

Running PCA
Running K-Means Clustering
Clustering has ended!

Purity of Refs:
{'ref': array([0.53453715, 0.20274272, 0.76409528]), 'y': 2, 'purity': 0.875}
{'ref': array([2.73874222, 0.08297756, 0.10602244]), 'y': 4, 'purity': 0.8981481481481481}
{'ref': array([-1.02968585,  0.81965482,  0.77538419]), 'y': 1, 'purity': 0.8392857142857143}
{'ref': array([ 0.31117579,  0.1708872 , -0.98717796]), 'y': 0, 'purity': 0.91875}
{'ref': array([-1.31345199,  0.03499478, -0.7710587 ]), 'y': 3, 'purity': 0.8896551724137931}
{'ref': array([-0.40446125, -1.98681706,  0.47260751]), 'y': 5, 'purity': 0.8811881188118812}
Accuracy: 15.0


### Using my K-Means Algorithm with PCA d=2

In [20]:
X_train, X_test, y_train, y_test, df = get_data()
# df is pandas dataframe before standardizing data
# y_test and X_test are Category data and style can be found above
# X_train and y_train cols are feature cols in the order
# Currently Feature Cols are: ['Time', 'Time_Class', 'DayOfWeek', 'X', 'Y']
print("Running PCA")
std_data, vals, vectors, std, mean = pca_vv(X_train)
z2, r2 = pca_dr(std_data, vals, vectors, 2)

print("Running K-Means Clustering")
kmeans = KMeans(n_clusters=6, random_state=0).fit(z2)
refs = myKMeans(z2, y_train, 6)
print("Clustering has ended!")
print("\nPurity of Refs:")
for ref in refs:
    print(ref)

train_, test_ = standardize_data(X_train, X_test, numpy_array=True)
test_z2, test_r2 = pca_dr(test_, vals, vectors, 2)
test_model(refs, test_z2, y_test)

Running PCA
Running K-Means Clustering
Clustering has ended!

Purity of Refs:
{'ref': array([ 0.97889994, -1.37481099]), 'y': 5, 'purity': 0.9036144578313253}
{'ref': array([2.70146454, 0.3091917 ]), 'y': 4, 'purity': 0.8942307692307693}
{'ref': array([-1.32399896,  0.91299239]), 'y': 2, 'purity': 0.8553459119496856}
{'ref': array([0.34867606, 0.64533715]), 'y': 0, 'purity': 0.8829268292682927}
{'ref': array([-0.87402448, -0.28193642]), 'y': 1, 'purity': 0.9058823529411765}
{'ref': array([-1.0058562 , -2.00628626]), 'y': 3, 'purity': 0.8493150684931506}
Accuracy: 13.0


### Using SKLearn K-Means Algorithm

In [21]:
train_, test_ = standardize_data(X_train, X_test, numpy_array=True)
test_z2, test_r2 = pca_dr(test_, vals, vectors, 2)
kmeans.predict(test_z2)

array([3, 4, 0, 2, 2, 5, 3, 2, 0, 3, 4, 3, 3, 3, 2, 5, 0, 1, 0, 0, 5, 3,
       2, 3, 2, 3, 1, 4, 5, 3, 5, 3, 5, 2, 4, 0, 0, 5, 3, 0, 5, 2, 5, 3,
       3, 3, 2, 0, 3, 2, 0, 2, 1, 0, 1, 0, 5, 2, 0, 3, 3, 1, 1, 0, 1, 5,
       1, 2, 0, 0, 5, 3, 0, 1, 3, 4, 3, 5, 0, 0, 5, 3, 3, 3, 3, 5, 3, 1,
       0, 3, 0, 2, 3, 2, 2, 2, 3, 0, 1, 0, 3, 3, 3, 3, 3, 3, 1, 0, 0, 3,
       3, 3, 4, 3, 3, 0, 4, 3, 0, 2, 3, 5, 0, 0, 3, 2, 4, 5, 0, 0, 3, 5,
       0, 0, 3, 2, 2, 2, 5, 3, 0, 0, 3, 0, 5, 0, 4, 0, 3, 1, 2, 3, 3, 5,
       4, 0, 3, 3, 4, 3, 3, 0, 0, 0, 3, 1, 0, 5, 3, 3, 1, 4, 3, 5, 0, 2,
       0, 5, 1, 3, 2, 1, 0, 0, 5, 3, 3, 2, 4, 1, 3, 0, 2, 3, 3, 3, 0, 3,
       5, 2], dtype=int32)

In [22]:
y_test[:, 0]

array([4, 5, 5, 4, 5, 5, 0, 0, 5, 5, 5, 4, 5, 5, 5, 5, 5, 2, 2, 0, 5, 0,
       5, 2, 5, 5, 2, 1, 5, 5, 5, 2, 0, 4, 1, 2, 5, 5, 2, 5, 5, 5, 5, 0,
       5, 0, 3, 5, 0, 5, 5, 5, 5, 5, 0, 0, 3, 4, 5, 3, 0, 5, 1, 1, 5, 5,
       5, 5, 3, 2, 0, 5, 0, 1, 0, 5, 5, 5, 5, 5, 5, 4, 3, 0, 5, 1, 4, 5,
       5, 5, 5, 5, 2, 5, 5, 1, 5, 5, 5, 1, 5, 5, 0, 5, 5, 5, 5, 1, 1, 5,
       5, 1, 5, 0, 5, 5, 5, 5, 5, 0, 5, 5, 3, 0, 0, 5, 5, 0, 0, 3, 5, 0,
       5, 5, 5, 0, 5, 5, 4, 2, 5, 0, 5, 2, 5, 0, 2, 2, 5, 5, 5, 2, 0, 5,
       1, 3, 0, 0, 0, 0, 1, 5, 2, 0, 1, 1, 3, 5, 5, 4, 1, 5, 5, 5, 1, 5,
       5, 0, 5, 5, 4, 2, 2, 5, 1, 5, 3, 2, 1, 3, 0, 5, 5, 1, 5, 5, 1, 0,
       1, 0])