In [1]:
# Import module
import numpy as np
import os

%matplotlib inline
import matplotlib.pyplot as plt

import pickle
from sklearn.preprocessing import MinMaxScaler

### Folder location/creation and functions definition

In [2]:
# quick check and folder creation if it was removed
if not os.path.isdir('../dataset'):
    os.mkdir('../dataset')
else:
    print('forlder dataset exists')
    
# folder locations
dataset_fold = os.path.join('..', 'dataset')

groundtruth_fold = os.path.join('..', 'groundtruth')
enrollment_fold = os.path.join(groundtruth_fold, 'enrollment')
verification_fold = os.path.join(groundtruth_fold, 'verification')

        
def dictionary_creation(file):
    dictionary = {}    
    
    with open(file) as users:  
#         Data is a big string containing all users' ID
        data = users.read()
#         We split this string to have each number individually into substrings
        l_data = data.split( )
        
        for user in l_data:
#             The keys of the disctionary are the users' ID, and the value is an empty list for now that will contain arrays of signature data
            dictionary[user] = list()
    
    return dictionary



def save_obj(obj, name):
    with open('./../dataset/' + name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
        

def normalize_vect(variable):
    r_variable = variable.reshape(-1, 1)
    
    scaler = MinMaxScaler()
    normalized_variable = scaler.fit_transform(r_variable)
    
#     normalized_variable = normalized_variable.reshape(np.shape(variable))
    
    return normalized_variable

forlder dataset exists


### Creation/Loading of the enrollment dictionary

In [3]:
# This dictionary contains 30 elements refered to with keys (IDs of the users).
# For each element, the value is a list containing 5 arrays containing the data of signatures (here 5 genuine signatures of the users).

user_file = os.path.join(groundtruth_fold, 'users.txt')
# Creation of the dictionary
enrollment_dict = dictionary_creation(user_file)

enrollment_file = os.path.join(dataset_fold, 'enrollment.pkl')

# Check if the file already exists
if not os.path.isfile(enrollment_file):
    print("Creating the enrollment dictionary")
#     for each file containing data of a genuine signature
    for file_name in os.listdir(enrollment_fold):

        file = enrollment_fold + '/' + file_name

        with open(file) as f:  

#            Here data is a string containing all information on seven columns:
#             t, x, y, pressure, penup, azimuth, inclination
            data = f.read()

#            We create a list containing substrings, all float values separated --> l_data for list
            l_data = data.split( )

#            We create an array to store the data in a float format --> a_data for array
            a_data = np.array(l_data, dtype = float)

#            As we know that for each file there is seven columns, we can reshape the array that we created above --> r_data for reshaped
            r_data = a_data.reshape(int(np.shape(a_data)[0]/7), 7)

#             We put the array in the dictionary at the corresponding key
            enrollment_dict[file_name[:3]].append(r_data)


#     Save dictionary in a pickle file that can be load later
    save_obj(enrollment_dict, "enrollment")
    
else:
    #If the files exists, they are just loaded.
    print("Loading the enrollment dictionary")    
    with open(enrollment_file, 'rb') as f:
        dict_bin = pickle.load(f)    
    
    enrollment_dict.update(dict_bin)
    
print("Enrollment dictionary complete")      

Loading the enrollment dictionary
Enrollment dictionary complete


### Creation/Loading of the verification dictionary

In [4]:
# This dictionary also contains 30 elements refered to with keys (IDs of the users).
# For each element, the value is a list containing 45 arrays containing the data of signatures (here 20 genuine signatures of the users and 25 forgeries).

# Creation of the dictionary
verification_dict = dictionary_creation(user_file)

verification_file = os.path.join(dataset_fold, 'verification.pkl')

# Check if the file already exists
if not os.path.isfile(verification_file):
    print("Creating the verification dictionary")
#     for each file containing data of a signature, genuine or forged
    for file_name in os.listdir(verification_fold):

        file = verification_fold + '/' + file_name

        with open(file) as f:  

#            Here data is a string containing all information on seven columns:
#             t, x, y, pressure, penup, azimuth, inclination
            data = f.read()

#            We create a list containing substrings, all float values separated --> l_data for list
            l_data = data.split( )

#            We create an array to store the data in a float format --> a_data for array
            a_data = np.array(l_data, dtype = float)

#            As we know that for each file there is seven columns, we can reshape the array that we created above --> r_data for reshaped
            r_data = a_data.reshape(int(np.shape(a_data)[0]/7), 7)
    
            l_id_data = list()
#             This contains the id of the signature. Usefull for future computation
            l_id_data.append(file_name[4:6])
#             This contians the data of the corresponding signature
            l_id_data.append(r_data)
        
#             We put the list containing the id of the signature as a string and the array of the data in the dictionary at the corresponding key    
            verification_dict[file_name[:3]].append(l_id_data)


#     Save dictionary in a pickle file that can be load later
    save_obj(verification_dict, "verification")
    
else:
    #If the files exists, they are just loaded.
    print("Loading the verification dictionary")
    with open(verification_file, 'rb') as f:
        dict_bin = pickle.load(f)    
    
    verification_dict.update(dict_bin)    
    
print("Verification dictionary complete")      

Loading the verification dictionary
Verification dictionary complete


### Features creation and normalization

In [5]:
norm_enrollment_dict = dictionary_creation(user_file)

norm_enrollment_file = os.path.join(dataset_fold, 'norm_enrollment.pkl')

# Check if the file already exists
if not os.path.isfile(norm_enrollment_file):
    print("Creating the normalized enrollment dictionary")

    for user in enrollment_dict:
        data = enrollment_dict[user]

        for sign in data:
            time = sign[:,0]
            x = sign[:,1]
            y = sign[:,2]
            pressure = sign[:,3]

    # After troubles understanding the data, we concluded that we don't need the penup feature.
    # The pen position (x, y) is recorded even though the pen is in an up position, 
    # and the information of position and velocity will still be used to compare signatures

#            Creation of the velocity features
            n = len(time)
            vx = np.zeros(n)
            vy = np.zeros(n)

#            The instantaneous velocity is delta_x/delta_t. 
#            So here for every signature delta_t is 0.01s and delta x is the difference between the x at time t and the x at time t+1.
            for i in range(n):
                if(i == 0):
                    vx[i] = 0
                    vy[i] = 0
                else:
                    vx[i] = abs(x[i]-x[i-1])/0.01
                    vy[i] = abs(y[i]-y[i-1])/0.01

#             Then we normalize each features independent
            norm_x = normalize_vect(x)
            norm_y = normalize_vect(y)
            norm_vx = normalize_vect(vx)
            norm_vy = normalize_vect(vy)
            norm_pressure = normalize_vect(pressure)

#             And we store the data into the data that we store in the dictionary
            norm_sign = np.append(norm_x, norm_y, axis = 1)
            norm_sign = np.append(norm_sign, norm_vx, axis = 1)
            norm_sign = np.append(norm_sign, norm_vy, axis = 1)
            norm_sign = np.append(norm_sign, norm_pressure, axis = 1)

            norm_enrollment_dict[user].append(norm_sign)

#     Save dictionary in a pickle file that can be load later
    save_obj(norm_enrollment_dict, "norm_enrollment")
    
else:
    #If the files exists, they are just loaded.
    print("Loading the normalized enrollment dictionary")
    with open(norm_enrollment_file, 'rb') as f:
        dict_bin = pickle.load(f)    
    
    norm_enrollment_dict.update(dict_bin)    
    
print("Normalized enrollment dictionary complete")  

Loading the normalized enrollment dictionary
Normalized enrollment dictionary complete


In [6]:
norm_verification_dict = dictionary_creation(user_file)

norm_verification_file = os.path.join(dataset_fold, 'norm_verification.pkl')

# Check if the file already exists
if not os.path.isfile(norm_verification_file):
    print("Creating the normalized verification dictionary")

    for user in verification_dict:
        data = verification_dict[user]

        for sign in data:
#             We only take the data and not the identification number
            sign_d = sign[1]
    
            time = sign_d[:,0]
            x = sign_d[:,1]
            y = sign_d[:,2]
            pressure = sign_d[:,3]

#            Creation of the velocity features
            n = len(time)
            vx = np.zeros(n)
            vy = np.zeros(n)

#            The instantaneous velocity is delta_x/delta_t. 
#            So here for every signature delta_t is 0.01s and delta x is the difference between the x at time t and the x at time t+1.
            for i in range(n):
                if(i == 0):
                    vx[i] = 0
                    vy[i] = 0
                else:
                    vx[i] = abs(x[i]-x[i-1])/0.01
                    vy[i] = abs(y[i]-y[i-1])/0.01

#             Then we normalize each features independent
            norm_x = normalize_vect(x)
            norm_y = normalize_vect(y)
            norm_vx = normalize_vect(vx)
            norm_vy = normalize_vect(vy)
            norm_pressure = normalize_vect(pressure)

#             And we store the data into the data that we store in the dictionary
            norm_sign = np.append(norm_x, norm_y, axis = 1)
            norm_sign = np.append(norm_sign, norm_vx, axis = 1)
            norm_sign = np.append(norm_sign, norm_vy, axis = 1)
            norm_sign = np.append(norm_sign, norm_pressure, axis = 1)
            
            l_id_data = list()
#             This contains the id of the signature. Usefull for future computation
            l_id_data.append(sign[0])
#             This contians the data of the corresponding signature
            l_id_data.append(norm_sign)

            norm_verification_dict[user].append(l_id_data)

#     Save dictionary in a pickle file that can be load later
    save_obj(norm_verification_dict, "norm_verification")
    
else:
    #If the files exists, they are just loaded.
    print("Loading the normalized veification dictionary")
    with open(norm_verification_file, 'rb') as f:
        dict_bin = pickle.load(f)    
    
    norm_verification_dict.update(dict_bin)    
    
print("Normalized verification dictionary complete")  

Loading the normalized veification dictionary
Normalized verification dictionary complete


### To access data:
#### For the enrollment dictionary (and the normalized one -> norm_enrollment_dict):

- enrollment_dict['001'] : gives all genuine signatures for the user "001"<br>
- enrollment_dict['001'][ i ] : gives the ith genuine signature for the user "001"<br>
- enrollment_dict['001'][ i ][ :, j ] : gives the jth column of the ith genuine signature for the user "001"<br>

#### For the verification dictionary (and the normalized one -> norm_verification_dict):

- verification_dict['001'] : gives all signatures (ID and data) for the user "001", genuine or forged<br>
- verification_dict['001'][ i ] : gives the ith signature (ID and data) for the user "001"<br>
- verification_dict['001'][ i ][ 0 ] : gives the ID number of the ith signature for the user "001"<br>
- verification_dict['001'][ i ][ 1 ] : gives the data of the ith signature for the user "001"<br>
- verification_dict['001'][ i ][ 1 ][ :, j ] : gives the jth column of the data of the ith signature for the user "001"<br>