In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import scipy.io as spio
import os
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import ShuffleSplit, learning_curve
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn import svm



In [2]:
# Create features data frame for a specific subject

from tkinter import Tk, filedialog
import glob
root = Tk() # pointing root to Tk() to use it as Tk() in program.
root.withdraw() # Hides small tkinter window.
root.attributes('-topmost', True) # Opened windows will be active. above all windows despite of selection.
path = filedialog.askdirectory() # Returns opened path as str



In [3]:

# Choose a folder
dir =  glob.glob(os.path.join(path, "*", ""), recursive = True)

In [143]:
def getData(dir, before_q=2):

    for i in range(dir.__len__()):

        movs = glob.glob(os.path.join(dir[i] + "/*.avi"))
        dfh5_file = glob.glob(os.path.join(dir[i] + "/*30000.h5"))
        vidcap = cv2.VideoCapture(movs[0])
        success, image = vidcap.read()
        print("Able to read Mouse " + str(i) + " movie?: " + str(success))
        fps = vidcap.get(cv2.CAP_PROP_FPS)
        frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps

        # load facial points
        dfh5 = pd.read_hdf(dfh5_file[0])

        # Clac Center of mass
        row = dfh5.iloc[[2]]
        row = np.squeeze(row.to_numpy())
        nose = row[:2]
        ear = row[12:14]
        center_of_mass = np.mean([nose, ear], axis=0)

        # Subtract CoM
        dfh5.iloc[:, [0, 3, 6, 9, 12]] -= center_of_mass[0]
        dfh5.iloc[:, [1, 4, 7, 10, 13]] -= center_of_mass[1]

        # convert to np
        pos_df = dfh5.iloc[1:, [0, 1, 3, 4, 6, 7, 9, 10, 12, 13]]

        pos_df.to_csv(os.path.join(dir[i] + "out.csv"))
        pos = pos_df.to_numpy()

        # pupil
        dfh5_pupil_file = glob.glob(os.path.join(dir[i] + "/*40000.h5"))
        if dfh5_pupil_file:
            dfh5_pupil = pd.read_hdf(dfh5_pupil_file)

            dist = lambda x, y: np.linalg.norm(x - y)

            pupil = dfh5_pupil.iloc[1:, [0, 1, 3, 4, 6, 7, 9, 10, 12, 13]]
            pupil = pupil.to_numpy()

            # i_up = pupil[:, 0:2] |  i_down = pupil[:, 2:4]  |  i_right = pupil[:, 4:6] |  i_left = pupil[:, 6:8]
            dist_hor = list(map(dist, pupil[:, 0:2], pupil[:, 4:6]))
            dist_ver = list(map(dist, pupil[:, 4:6], pupil[:, 6:8]))
            pos_n_pupil = np.concatenate((pos, dist_hor, dist_ver), axis=1)


        else:
            print("Mouse " + str(i) + " have no pupil file")
            pos_n_pupil = pos

        # run matlab code if necessary

        # get session data
        data = spio.loadmat(os.path.join(dir[i] + 'data.mat'))

        correction = pos_n_pupil.shape[0] / ((data['time'].shape[1] / data['Fs']) * fps)

        tSOS_new = data['tSOS'] * correction
        Lick_times_new = np.ravel(data['Lick_times'] * correction + (
                tSOS_new[0] - np.minimum(data['Go_times'][0, 0], data['NoGo_times'][0, 0])))

        stimuli = np.sort(np.append(data['Go_times'], data['NoGo_times'])) * correction
        stimuli_type = np.in1d(stimuli, data['Go_times'] * correction)

        # movie interpolation
        r = pd.RangeIndex(0, int((data['time'].shape[1] / data['Fs']) * fps), 1)
        t = pos_df
        t = t.sort_index()
        new_idx = np.linspace(t.index[0], len(r), len(r))
        t = (t.reindex(new_idx, method='ffill', limit=1).iloc[1:].interpolate())

        pos_n_pupil = t.to_numpy()

        # segment to trails
        for idx, seg in enumerate(data['tSOS'] * fps):
            segment = pos_n_pupil[int(seg - before_q * fps):int(seg), :].reshape([int(before_q * fps) * pos_n_pupil.shape[1]])
            lick_seg = np.any(np.ravel(np.where((Lick_times_new > seg / fps) & (Lick_times_new < seg / fps + before_q))))
            if idx == 0:
                x = segment
                y = lick_seg
            else:
                x = np.row_stack((x, segment))
                y = np.row_stack((y, lick_seg))

        # concat this mouse data to the others



            # create empty arrays
        if i == 0:
            X = x
            y_tot = y
            stimuli_tot = stimuli_type
        X = np.concatenate((X, x), axis=0)
        print(x.shape)

        y_tot = np.concatenate((y_tot, y), axis=0)
        stimuli_tot = np.concatenate((stimuli_tot, stimuli_type), axis=0)
        print(stimuli_type.shape)


    return X, np.ravel(y_tot), np.ravel(stimuli_tot)

In [150]:
X, y, stimuli_type = getData(dir, before_q=0.5)

Able to read Mouse 0 movie?: True
Mouse 0 have no pupil file
(51, 150)
(51,)
Able to read Mouse 1 movie?: True
Mouse 1 have no pupil file
(58, 150)
(56,)
Able to read Mouse 2 movie?: True
Mouse 2 have no pupil file
(51, 150)
(51,)
Able to read Mouse 3 movie?: True
Mouse 3 have no pupil file
(66, 150)
(66,)
Able to read Mouse 4 movie?: True
Mouse 4 have no pupil file
(69, 150)
(69,)
Able to read Mouse 5 movie?: True
Mouse 5 have no pupil file
(67, 150)
(61,)
Able to read Mouse 6 movie?: True
Mouse 6 have no pupil file
(56, 150)
(56,)
Able to read Mouse 7 movie?: True
Mouse 7 have no pupil file
(62, 150)
(62,)


In [124]:
# smoothing
# from scipy.signal import savgol_filter, filtfilt, ellip
# b, a = ellip(4, 0.01, 120, 0.125)  # Filter to be applied.
#
# X_hat = np.zeros_like(X)
# for i in range(X.shape[0]):
#     X_hat[:,i] = filtfilt(b, a,X[:,i], method="gust")

In [126]:

# add the stimuli sounds
X = np.c_[X, stimuli_type]


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 531 and the array at index 1 has size 523

In [153]:
K = 200
acc = np.zeros([K,2])


for k in range(K):

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)



    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # pca = PCA(n_components=10)
    # pca.fit_transform(X_train, y_train)
    # pca.fit(X_test)


    # Training and Making Predictions
    # classifier = GaussianNB()
    classifier = svm.SVC()
    # classifier = RandomForestClassifier(max_depth=2, random_state=0)

    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    acc[k,0] = accuracy_score(y_test, y_pred)
    train_pred = classifier.predict(X_train)
    acc[k,1] = accuracy_score(y_train, train_pred)



# Evaluating the Performance
print('Test accuracy: ' + str(np.mean(acc[:, 0])) + ' || STD: ' + str(np.std(acc[:, 0])))
print('Train accuracy: ' + str(np.mean(acc[:, 1])) + ' || STD: ' + str(np.std(acc[:, 1])))


Test accuracy: 0.8347196261682243 || STD: 0.03301033936545144
Train accuracy: 0.855 || STD: 0.007284491248175741
