In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import scipy.io as spio
import os
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.model_selection import ShuffleSplit, learning_curve
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [4]:
# Create features data frame for a specific subject

from tkinter import Tk, filedialog
import glob
root = Tk() # pointing root to Tk() to use it as Tk() in program.
root.withdraw() # Hides small tkinter window.
root.attributes('-topmost', True) # Opened windows will be active. above all windows despite of selection.
path = filedialog.askdirectory() # Returns opened path as str



In [5]:

# Choose a folder
dir =  glob.glob(os.path.join(path, "*", ""), recursive = True)

In [17]:
def getData(dir, before_q=2):
    # create empty arrays
    X = np.empty((1, 60, 10))
    y_tot = np.array([])
    stimuli_tot = np.array([])

    for i in range(dir.__len__()):

        movs = glob.glob(os.path.join(dir[i] + "/*.avi"))
        dfh5_file = glob.glob(os.path.join(dir[i] + "/*30000.h5"))
        vidcap = cv2.VideoCapture(movs[0])
        success, image = vidcap.read()
        print(success)
        fps = vidcap.get(cv2.CAP_PROP_FPS)
        frame_count = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
        duration = frame_count / fps

        # load facial points
        dfh5 = pd.read_hdf(dfh5_file[0])

        # Clac Center of mass
        row = dfh5.iloc[[2]]
        row = np.squeeze(row.to_numpy())
        nose = row[:2]
        ear = row[12:14]
        center_of_mass = np.mean([nose, ear], axis=0)

        # Subtract CoM
        dfh5.iloc[:, [0, 3, 6, 9, 12]] -= center_of_mass[0]
        dfh5.iloc[:, [1, 4, 7, 10, 13]] -= center_of_mass[1]

        # convert to np
        pos_df = dfh5.iloc[1:, [0, 1, 3, 4, 6, 7, 9, 10, 12, 13]]
        pos = pos_df.to_numpy()

        # pupil
        dfh5_pupil_file = glob.glob(os.path.join(dir[i] + "/*40000.h5"))
        if dfh5_pupil_file:
            dfh5_pupil = pd.read_hdf(dfh5_pupil_file)

            dist = lambda x, y: np.linalg.norm(x - y)

            pupil = dfh5_pupil.iloc[1:, [0, 1, 3, 4, 6, 7, 9, 10, 12, 13]]
            pupil = pupil.to_numpy()

            # i_up = pupil[:, 0:2] |  i_down = pupil[:, 2:4]  |  i_right = pupil[:, 4:6] |  i_left = pupil[:, 6:8]
            dist_hor = list(map(dist, pupil[:, 0:2], pupil[:, 4:6]))
            dist_ver = list(map(dist, pupil[:, 4:6], pupil[:, 6:8]))
            pos_n_pupil = np.concatenate((pos, dist_hor, dist_ver), axis=1)


        else:
            print("Mouse" + str(i) + ": no pupil file")
            pos_n_pupil = pos

        # get session data
        data = spio.loadmat(os.path.join(dir[i] + 'data.mat'))
        stimuli = np.sort(np.append(data['Go_times'], data['NoGo_times']))

        # Go = True, NoGo = False
        stimuli_type = np.r_[np.in1d(stimuli, data['Go_times']), False]

        # convert to the right timescale
        stimuli_time = stimuli * fps

        # Correct the times (for this specific subject)
        a = np.zeros([np.abs(int((np.max(data['Time']) - duration) * fps)), pos_n_pupil.shape[1]])
        pos_n_pupil = np.concatenate((a, pos_n_pupil), axis=0)

        # divide into segments according to stimulus
        seg_data = np.split(pos_n_pupil, (stimuli_time.astype(int)))

        # take relevant times
        rel_time = int(before_q * fps)  # 2 sec
        allow_licking = int(2 * fps)  # 2 sec

        # count for licks
        for j in range(len(stimuli) + 1):
            seg_data[j] = seg_data[j][-rel_time:, :]

        # after extracting relevant time bins convert to array
        seg_data = np.array(seg_data)

        # Get licks in time
        lick_seg = np.zeros(len(stimuli) + 1)
        t = np.arange(int(np.max(data['Time']) * fps))
        seg_t = np.split(t, (stimuli_time.astype(int)))

        h = 0

        for tLick in np.ravel(data['Lick_times'] * fps):

            if (h < len(stimuli_time)) and (tLick > stimuli_time[h]):
                h += 1

            if np.isin(int(tLick), seg_t[h][:allow_licking]):
                lick_seg[h] += 1

        # labeling
        y = np.where(lick_seg > 0, 1, 0)


        if i == 0:
            mice = pd.DataFrame({
                "name": np.array(movs),  # mouse name
                "fps": fps,  # movie frame per second
                "duration": duration,  # movie duration
                "recording time": np.max(data['Time']),  # session duration
                "raw_X": [pos_n_pupil],  # raw features
                "X": [seg_data],  # features segmented to trails
                "y": [lick_seg],  # did the mouse lick
                "stimuli_type": [stimuli_type],
                "raw_data": [data],
                "raw_face_pos": [dfh5]})
        else:
            mouse = pd.DataFrame({
                "name": np.array(movs),  # mouse name
                "fps": fps,  # movie frame per second
                "duration": duration,  # movie duration
                "recording time": np.max(data['Time']),  # session duration
                "raw_X": [pos_n_pupil],  # raw features
                "X": [seg_data],  # features segmented to trails
                "y": [lick_seg],  # did the mouse lick
                "stimuli_type": [stimuli_type],
                "raw_data": [data],
                "raw_face_pos": [dfh5]})

            mice = pd.concat([mice, mouse], ignore_index=True, axis=0)

        # concat this mouse data to the others
        X = np.concatenate((X, seg_data), axis=0)
        y_tot = np.concatenate((y_tot, y), axis=0)
        stimuli_tot = np.concatenate((stimuli_tot, stimuli_type), axis=0)


    # Delete unnecessary first raw
    X = np.delete(X, obj=0, axis=0)

    return mice, X, y_tot, stimuli_tot


In [18]:
before_q = 2
Mice, X, y, stimuli_type = getData(dir, before_q=2)

True
Mouse0: no pupil file
True
Mouse1: no pupil file
True
Mouse2: no pupil file
True
Mouse3: no pupil file
True
Mouse4: no pupil file
True
Mouse5: no pupil file
True
Mouse6: no pupil file
True
Mouse7: no pupil file


In [23]:
# smoothing
from scipy.signal import savgol_filter, filtfilt, ellip
b, a = ellip(4, 0.01, 120, 0.125)  # Filter to be applied.

X_hat = np.zeros_like(X)
for i in range(X.shape[2]):
    X_hat[:,i] = filtfilt(b, a,X[:,i], method="gust")

In [24]:
# Train a model
X = np.reshape(X_hat, [X_hat.shape[0], X_hat.shape[1]*X_hat.shape[2]])

# add the stimuli sounds
X = np.c_[X, stimuli_type.T]

# # exclod zeros for this mouse
# X = X[5:, :]
# y = y[5:]


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 480 and the array at index 1 has size 125

In [None]:
K = 150
from sklearn import svm

acc = np.zeros([K,2])
for k in range(K):

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

    # Feature Scaling
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)

    # pca = PCA(n_components=10)
    # pca.fit_transform(X_train, y_train)
    # pca.fit(X_test)


    # Training and Making Predictions
    # classifier = GaussianNB()
    classifier = svm.SVC()
    # classifier = RandomForestClassifier(max_depth=2, random_state=0)


    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)
    acc[k,0] = accuracy_score(y_test, y_pred)
    train_pred = classifier.predict(X_train)
    acc[k,1] = accuracy_score(y_train, train_pred)


# Evaluating the Performance
print('Test accuracy: ' + str(np.mean(acc[:, 0])) + ' || STD: ' + str(np.std(acc[:, 0])))
print('Train accuracy: ' + str(np.mean(acc[:, 1])) + ' || STD: ' + str(np.std(acc[:, 1])))
