## Initialize Stuff & Read Data

In [20]:
import matplotlib.pyplot as plt
import numpy as np
import csv

%matplotlib inline

In [21]:
f = open('../data/test.csv', 'r')
csv_reader = csv.DictReader(f)

In [22]:
rows = [row for row in csv_reader]
sequences = {int(row['Id']): [int(s) for s in row['Sequence'].split(',')] for row in rows}
print(len(sequences.keys()))

113845


# Categories 

In [23]:
def all_seqs(A):
    return A

def strictly_increasing(A):
    return all(x < y for x, y in zip(A, A[1:]))
    
def non_decreasing(A):
    return all(x <= y for x, y in zip(A, A[1:]))
    
def strictly_decreasing(A):
    return all(x > y for x, y in zip(A, A[1:]))
    
def non_increasing(A):
    return all(x >= y for x, y in zip(A, A[1:]))

def non_monotonic(A):
    return not any([strictly_increasing(A), strictly_decreasing(A), non_increasing(A), non_decreasing(A)])

seq_types = [ all_seqs, strictly_increasing,non_decreasing,strictly_decreasing,non_increasing,non_monotonic]
seq_types = list(zip(seq_types, map(lambda x: x.__name__, seq_types)))

In [24]:
def things(Y):
    X = np.linspace(1,len(Y),len(Y))
    return X[:-1], Y[:-1], X[-1], Y[-1]

In [25]:
def test(sequences, f):
    for i, (filter_seq, name) in enumerate(seq_types):
        universe = list(filter(filter_seq, sequences))
        num = sum(f(Y) for Y in universe)
        print(name, num, num/float(len(universe)))
        if i % 30000 == 0:
            print(i)

In [26]:
for f, name in seq_types:
    print(name, len(list(filter(f, sequences.values()))))

all_seqs 113845
strictly_increasing 60002
non_decreasing 72585
strictly_decreasing 272
non_increasing 468
non_monotonic 40870


# Polynomial Fits

In [8]:
from random import sample

In [9]:
from scipy.interpolate import interp1d

In [11]:
def test_poly(Y):
    kinds = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'slinear']
    trainx,trainy,x,y = things(Y)
    for kind in kinds:
        try:
            f = interp1d(trainx, trainy,  kind=kind, assume_sorted=True, bounds_error=False, fill_value=1)
            if f(x) == y:
                return 1
        except:
            pass
    return 0

In [12]:
test(list(sequences.values()), test_poly)

all_seqs 3027 0.02658878299442224
0
strictly_increasing 1 1.6666111129629014e-05
non_decreasing 10 0.00013776951160708134
strictly_decreasing 1 0.003676470588235294
non_increasing 13 0.027777777777777776
non_monotonic 3008 0.07359921702960606


# linear

In [31]:
def test_poly(Y):
    trainx,trainy,x,y = things(Y)
    try:
        f = interp1d(trainx, trainy, kind='linear', assume_sorted=True, bounds_error=False, fill_value=1)
        if f(x) == y:
            return 1
    except:
        pass
    return 0

In [32]:
test(list(sequences.values()), test_poly)

all_seqs 3025 0.0265712152488032
0
strictly_increasing 0 0.0
non_decreasing 8 0.00011021560928566508
strictly_decreasing 1 0.003676470588235294
non_increasing 12 0.02564102564102564
non_monotonic 3008 0.07359921702960606


# quadratic

In [33]:
def test_poly(Y):
    trainx,trainy,x,y = things(Y)
    try:
        f = interp1d(trainx, trainy,  kind='quadratic', assume_sorted=True, bounds_error=False, fill_value=1)
        if f(x) == y:
            return 1
    except:
        pass
    return 0

In [34]:
test(list(sequences.values()), test_poly)

all_seqs 3023 0.026553647503184154
0
strictly_increasing 0 0.0
non_decreasing 8 0.00011021560928566508
strictly_decreasing 0 0.0
non_increasing 11 0.023504273504273504
non_monotonic 3007 0.07357474920479569


# cubic

In [11]:
def test_poly(Y):
    trainx,trainy,x,y = things(Y)
    try:
        f = interp1d(trainx, trainy,  kind='cubic', assume_sorted=True, bounds_error=False, fill_value=1)
        if f(x) == y:
            return 1
    except:
        passc
    return 0

In [None]:
test(list(sequences.values()), test_poly)

all_seqs 3027 0.02658878299442224
0
strictly_increasing 1 1.6666111129629014e-05
non_decreasing 10 0.00013776951160708134
strictly_decreasing 1 0.003676470588235294
non_increasing 13 0.027777777777777776
non_monotonic 3008 0.07359921702960606


# last 2 linear fit

In [47]:
def lin_fit(Y):
    trainx,trainy,x,y = things(Y)
    trainx = trainx[-2:]
    trainy = trainy[-2:]
    f = interp1d(trainx, trainy, kind='linear', assume_sorted=True)
    if f(x) == y:
        return 1
    return 0

In [48]:
test(list(sequences.values()), lin_fit)

ValueError: A value in x_new is above the interpolation range.

In [43]:
def lin_fit(Y):
    trainx,trainy,x,y = things(Y)
    trainx = trainx[-2:]
    trainy = trainy[-2:]
    try:
        f = interp1d(trainx, trainy, kind='slinear', assume_sorted=True, bounds_error=False, fill_value=1)
        if f(x) == y:
            return 1
    except:
        pass
    return 0

In [44]:
test(list(sequences.values()), lin_fit)

all_seqs 3025 0.0265712152488032
0
strictly_increasing 0 0.0
non_decreasing 8 0.00011021560928566508
strictly_decreasing 1 0.003676470588235294
non_increasing 12 0.02564102564102564
non_monotonic 3008 0.07359921702960606
