In [1]:
from sklearn.datasets import make_classification
from scipy.stats import pearsonr
from sklearn import svm
from sklearn.preprocessing import StandardScaler
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [46]:
def generate_data(separation, seed=None):
    
    x, y = make_classification(n_samples=1000, n_features=200, n_classes=2,
                               n_informative=200, n_redundant=0, class_sep=separation,
                               flip_y=0., shuffle=False, random_state=seed)
    return x, y

In [61]:
def trainandtest_svm(x, y, shuffle=True, seed=42):
    
    zscore = lambda x: (x - np.mean(x))/np.std(x)
    
    if shuffle:
        rng = np.random.default_rng(seed=seed)
        shuffled_indices = rng.permutation(len(x))
        x = x[shuffled_indices]
        y = y[shuffled_indices]
    
    train_x = x[:800]
    train_y = y[:800]
    test_x = x[800:]
    test_y = y[800:]

    train_z = zscore(train_x)
    test_z = zscore(test_x)

    clf = svm.SVC(kernel='linear')
    clf.fit(train_z, train_y)

    y_pred = clf.predict(test_z)

    # Compute distance from bound
    y = clf.decision_function(test_z)
    w_norm = np.linalg.norm(clf.coef_)
    dist = y / w_norm

    zscoredist = zscore(dist)
    zscoredist[test_y==0] *= -1
    
    return np.mean(zscoredist)

In [62]:
import ipdb
def align_and_combine(list_of_x, list_of_y):
    n_samples = len(list_of_x)
    x = list_of_x[0]
    y = list_of_y[0]
    order = np.argsort(y)
    x = x[order]
    y = y[order]
    if n_samples == 1:
        return x, y
    
    for thisx, thisy in zip(list_of_x[1:], list_of_y[1:]):
        order = np.argsort(thisy)
        thisx = thisx[order]
        thisy = thisy[order]
        try:
            assert(np.all(y==thisy))
        except:
            ipdb.set_trace()
        x += thisx
    
    x /= n_samples
    
    return x, y

In [77]:
x_list = []
y_list = []
for t in np.linspace(0., 2., 10):
    x_, y_ = generate_data(t, seed=1)
    x_list.append(x_)
    y_list.append(y_)
x, y = align_and_combine(x_list, y_list)

In [78]:
trainandtest_svm(x, y)

0.5345988321726217

In [60]:
y

array([0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,

In [None]:
x0, y0 = generate_data(0.8, seed=30)
order = np.argsort(y0)
x0 = x0[order]
y0 = y0[order]

x1, y1 = generate_data(0.8, seed=21)

In [None]:
x0.shape

In [None]:
np.all(np.sort(y0)==np.sort(y1))

In [None]:
x.shape

In [None]:
y1

In [None]:
y1

In [None]:
distance = trainandtest_svm(x, y)

In [None]:
distance

In [None]:
separations = np.linspace(0.1, 3.0, 300)
distances = []
for s in tqdm(separations):
    distances.append(trainandtest_svm(s))

In [None]:
plt.scatter(separations, distances)

In [None]:
plt.scatter(separations, distances)

In [None]:
np.random.normal(0., 0.5)

In [None]:
n_trials = 400 # 300 congruent + 100 incongruent
slope_cong = 0.4
interc_cong = 0.
slope_inc = 0.3
interc_inc = 0.
max_act = 0.3
min_act = -0.1
sigma = 0.05

conditions = []
activations = []
separations = []
distances = []

for trial in range(n_trials): # trials
    if trial < 300: # congruent
        slope = slope_cong
        intercept = interc_cong
        conditions.append(1)
    else: # incongruent
        slope = slope_inc
        intercept = interc_inc
        conditions.append(0)
    this_act = random.uniform(min_act, max_act)
    this_sep = slope*this_act + intercept + np.random.normal(0., sigma)
    activations.append(this_act)
    separations.append(this_sep)
    
data = pd.DataFrame({'congruent': conditions, 'activation': activations,
                     })

In [None]:
plt.scatter(activations, separations, c=conditions, edgecolor='k')

In [None]:
pearsonr(activations[:300], separations[:300]) # congruent

In [None]:
pearsonr(activations[300:], separations[300:]) # incongruent