In [1]:
# We are going to create a MLP that classifies triangles.
# 1. Create data (triples) of side lengths, and also angles.  (Consider differences in data.)
# 2. Label these data with a labeling function.  (Notice differences in labels.)
# 3. Create a MLP model for each.  (What do you expect from learning/performance?)
# 4. Pass respective data and labels into MLP model, and run fit loop.  
# 5. Plot and visualize results.  (Discuss)

In [2]:
# We could create a model which learns to "finish" triangles from two thirds of a triple.
# However, we already know the pythagorean theorem, which we will use for labeling.
# This is a toy model for illustration purposes.

In [3]:
from sklearn.neural_network import MLPClassifier
import numpy as np

In [4]:
# Create data (two different paths).  Stick with integers for simplicity.

In [5]:
def create_data_sides(num_side_triples):
    # Does it matter the range we choose for side lengths?
    side_triples = np.random.randint(low=1, high=1000, size=(num_side_triples, 3))
    return side_triples

In [6]:
def create_data_angles(num_angle_triples):
    # Some reasonable bounds on (integer) angles are [1,179).  Why?  
    # Could we choose other bounds for creating the distribution of integer degrees?
    angle_triples = np.random.randint(low=1, high=179, size=(num_angle_triples, 3))
    return angle_triples

In [7]:
# How much data do we want to generate.
num_data = 4

In [8]:
create_data_sides(num_data)

array([[563, 357, 694],
       [335,  53, 762],
       [672, 882, 911],
       [969, 480, 436]])

In [9]:
create_data_angles(num_data)

array([[ 64,  18, 171],
       [ 39,  96, 145],
       [134,   6,  80],
       [168,  35, 110]])

In [10]:
def side_labeler(side_data):
    '''
    Function takes ONLY side data and returns labels of successful triangles.
    '''
    # Labels are a vector of length of side_data, equal to 1 where Triangle Inequality Theorem holds.
    # https://en.wikipedia.org/wiki/Triangle_inequality
    
    # Grab longest side for each potential triangle, and compare with sum of other lengths.
    # One way to achieve this is to first sort the sides so that the longest side is last.
    side_data = np.array([sorted(i) for i in side_data])

    # Then compare and create labels (using list comprehension) according to the Theorem.
    labels = np.array([1 if (i[-1] <= i[0] + i[1]) else 0 for i in side_data])
    
    return labels

In [11]:
def angle_labeler(angle_data):
    '''
    Function takes ONLY angle data and returns labels of successful triangles.
    '''
    # Labeling angles is a bit more simple.  Simply check they sum to 180 degrees.
    labels = np.array([1 if (i[0] + i[1] + i[2] == 180) else 0 for i in angle_data])
    return labels

In [25]:
# Test side data creation
sides = create_data_sides(100)
sides

array([[434, 516,  73],
       [502, 176, 922],
       [300, 338, 673],
       [466, 218, 629],
       [333, 657, 636],
       [622, 929, 885],
       [991,   5, 712],
       [319, 840,  82],
       [469, 424, 608],
       [560, 405, 197],
       [295, 974, 843],
       [771, 651, 100],
       [207,  17,  80],
       [ 48, 773, 939],
       [422, 935,  22],
       [973, 806, 600],
       [625, 250, 156],
       [943, 610, 774],
       [979,  85, 249],
       [633, 128, 469],
       [222, 910, 945],
       [540,  21,  62],
       [137, 847, 398],
       [600, 418, 252],
       [334, 435, 208],
       [723, 185, 997],
       [120, 729, 660],
       [251, 898, 884],
       [677, 335, 549],
       [742, 801, 606],
       [610,  11, 957],
       [845, 965, 230],
       [ 29, 811, 659],
       [362, 202, 606],
       [248, 736, 158],
       [282, 979, 917],
       [218, 760, 916],
       [333, 321, 243],
       [954, 919, 920],
       [876, 113, 158],
       [689, 748, 143],
       [736, 308

In [26]:
# Test side labeling.  Sanity check by hand.
side_labels = side_labeler(sides)
side_labels

array([0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1])

In [14]:
# Test angle data creation
angles = create_data_angles(20)
angles

array([[ 16, 175, 100],
       [154, 121,  78],
       [170,  77, 154],
       [ 31, 142, 130],
       [ 13,  11,  96],
       [ 15,  76, 177],
       [147,  99,  41],
       [ 11, 167, 149],
       [ 49, 150,  80],
       [177,  75,  13],
       [128,  87,  46],
       [ 60,  91,  90],
       [128,  89,   9],
       [ 47,  11,   5],
       [ 72, 162,  28],
       [  2,  61,  87],
       [ 85, 177,  81],
       [169, 169,  86],
       [ 31,  34,  81],
       [ 95, 105,  99]])

In [15]:
# Test angle labeling. Sanity check.
angle_labels = angle_labeler(angles)
angle_labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [16]:
# What is one main difference between angle labels and side labels?

In [27]:
side_classifier = MLPClassifier(max_iter=500).fit(sides,side_labels)

In [28]:
side_classifier.predict_proba(sides)

array([[9.98564741e-01, 1.43525862e-03],
       [9.99999346e-01, 6.54210987e-07],
       [9.93515976e-01, 6.48402370e-03],
       [1.98328470e-02, 9.80167153e-01],
       [1.60663882e-09, 9.99999998e-01],
       [0.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 5.83725595e-26],
       [1.00000000e+00, 4.32789533e-28],
       [1.27036913e-08, 9.99999987e-01],
       [2.86595877e-04, 9.99713404e-01],
       [1.42728274e-10, 1.00000000e+00],
       [9.96472786e-01, 3.52721353e-03],
       [9.99953514e-01, 4.64858709e-05],
       [9.27417900e-01, 7.25821002e-02],
       [1.00000000e+00, 1.14073474e-34],
       [0.00000000e+00, 1.00000000e+00],
       [9.99999523e-01, 4.77259350e-07],
       [0.00000000e+00, 1.00000000e+00],
       [1.00000000e+00, 9.50017467e-23],
       [9.99999852e-01, 1.47898356e-07],
       [1.02030559e-05, 9.99989797e-01],
       [1.00000000e+00, 4.20890946e-14],
       [9.99999999e-01, 9.92613179e-10],
       [9.15509164e-06, 9.99990845e-01],
       [1.252707