# Using Pandas to handle CSV file For the Project

Importing pandas to extract Data from the CSV file

In [9]:
import numpy as np
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.0f' % x)


### For vertical lines ( Direction Folder = 0,1 , GreenX0 = GreenY0 )
Obtaining all records that have X Position less than the X Position of the green line

Now we take the point with the highest X among them which will be the first point before the line

Obtaining all the points that have X greater then X of the green line

Now we take the point with the smallest value for X among them which will be the first point before the line

Now we interpolate the two points to get more accurate results as the points may not cut the line due to many hardware factors

### For Horizontal lines ( Direction Folder = 2,3 , GreenY0 = redY0 )
Obtaining all records that have y Position less than the y Position of the green line

In [10]:
def horizontal_points(records, direction, color):
    
    if ( direction == 'left_to_right' ) :
   
        maxBeforeX = records[records['mouseX']<= records[ str(color) + 'X0']]
        maxTime = maxBeforeX [maxBeforeX['mouseX'].max() == maxBeforeX['mouseX']]


        minAfterX = records[records['mouseX']>= records[ str(color) + 'X0']]
        minTime = minAfterX [minAfterX['mouseX'].min() == minAfterX['mouseX']]
    
    if ( direction == 'right_to_left' ) :
    
        maxBeforeX = records[records['mouseX']>= records[ str(color) + 'X0']]
        maxTime = maxBeforeX [maxBeforeX['mouseX'].min() == maxBeforeX['mouseX']]
    
    
        minAfterX = records[records['mouseX']<= records[ str(color) + 'X0']]
        minTime = minAfterX [minAfterX['mouseX'].max() == minAfterX['mouseX']]
    
    
    frame1 = records.iloc [maxTime['timeMilSec'].idxmax()]
    frame2 = records.iloc [minTime['timeMilSec'].idxmin()]
    
    frames = [ frame1 , frame2 ]
    results = pd.concat(frames , axis = 1)
    results = (results.transpose())

    return (results)

In [11]:
def vertical_points(records, direction, color):
    
    
    if ( direction == "bottom_to_top" ) :
   
        maxBeforeY = records[records['InvertedMouseY'] <= records[ str(color) + 'Y0']]
        maxTime = maxBeforeY [maxBeforeY['InvertedMouseY'].max() == maxBeforeY['InvertedMouseY']]


        minAfterY = records[records['InvertedMouseY'] >= records[ str(color) + 'Y0']]
        minTime = minAfterY [minAfterY['InvertedMouseY'].min() == minAfterY['InvertedMouseY']]
    
    if ( direction == "top_to_bottom" ) :
    
        maxBeforeY = records[records['InvertedMouseY'] >= records[ str(color) + 'Y0']]
        maxTime = maxBeforeY [maxBeforeY['InvertedMouseY'].min() == maxBeforeY['InvertedMouseY']]
    
        minAfterY = records[records['InvertedMouseY'] <= records[ str(color) + 'Y0']]
        minTime = minAfterY [minAfterY['InvertedMouseY'].max() == minAfterY['InvertedMouseY']]               
        
    if maxTime.shape[0] <= 0 :
        return maxTime
    if minTime.shape[0] <= 0 :
        return minTime
    
    frame1 = records.iloc [maxTime['timeMilSec'].idxmax()]
    frame2 = records.iloc [minTime['timeMilSec'].idxmin()]
    
    frames = [ frame1 , frame2 ]
    results = pd.concat(frames , axis = 1)
    results = (results.transpose())

    return (results)

In [12]:
def interpolate(result) :
    
    result = result.reset_index()
    result.loc[2] = result.loc[1]
    result.loc[1]['mouseX','InvertedMouseY', 'timeMilSec'] = np.nan
    
    interpolation = result[['mouseX','InvertedMouseY', 'timeMilSec'] ]
    interpolation.set_index(['mouseX','InvertedMouseY'])
    interpolation = interpolation.interpolate(method='index')
    result.set_value(1, 'mouseX', interpolation['mouseX'][1])
    result.set_value(1, 'InvertedMouseY', interpolation['InvertedMouseY'][1])
    result.set_value(1, 'timeMilSec', interpolation['timeMilSec'][1])
    return result.loc[1]

In [13]:
from os import listdir
from os.path import isfile, join

def individual_data(data ,direction):
    
    if(direction == "left_to_right"):
        num_direction = 0
    elif(direction == "right_to_left"):
        num_direction = 1
    elif(direction == "bottom_to_top"):
        num_direction = 2
    elif(direction == "top_to_bottom"):
        num_direction = 3
    
    path = "C:\\Users\\Moghazy\\Desktop\\datasets"
    folders = [join(path, k) for k in listdir(path)]
    for user, folder in enumerate(folders) :    
        for i in range(0, 3):
            for j in range(0, 3):

                mypath = folder + "\\" + str(num_direction) + "\\" + str(i) + "\\"+ str(j)
                onlyfiles = [join(mypath, f) for f in listdir(mypath) if (isfile(join(mypath, f)))]
                if(num_direction == 0 or num_direction == 1):

                    for file in onlyfiles :
                        records = pd.read_csv(file)                            
                        record1 = horizontal_points(records, direction, 'green')
                        if record1.shape[0] <= 0:
                            continue
                        result1 = interpolate(record1)
    #                     data = data.append(result1)

                        record2 = horizontal_points(records, direction, 'red')
                        if record2.shape[0] <= 0:
                            continue
                        result2 = interpolate(record2)

                        result = result2
                        result["mouseX"] = abs(result2["mouseX"] - result1["mouseX"])
                        result["mouseY"] = abs(result2["mouseY"] - result1["mouseY"])
                        result["InvertedMouseY"] = abs(result2["InvertedMouseY"] - result1["InvertedMouseY"])
                        result["timeMilSec"] = abs(result2["timeMilSec"] - result1["timeMilSec"])
                        result["user"] = user

                        data = data.append(result)

                elif(num_direction == 2 or num_direction == 3):

                    for file in onlyfiles :
                        records = pd.read_csv(file)                            
                        record1 = vertical_points(records, direction, 'green')
                        if record1.shape[0] <= 0:
                            continue
                        result1 = interpolate(record1)
    #                     data = data.append(result1)

                        record2 = vertical_points(records, direction, 'red')
                        if record2.shape[0] <= 0:
                            continue
                        result2 = interpolate(record2)

                        result = result2
                        result["mouseX"] = abs(result2["mouseX"] - result1["mouseX"])
                        result["mouseY"] = abs(result2["mouseY"] - result1["mouseY"])
                        result["InvertedMouseY"] = abs(result2["InvertedMouseY"] - result1["InvertedMouseY"])
                        result["timeMilSec"] = abs(result2["timeMilSec"] - result1["timeMilSec"])
                        result["user"] = user
                        
                        
                        data = data.append(result)
                    


            
    return data

In [14]:
data = pd.DataFrame()
final_data = pd.DataFrame()
directions = ["left_to_right" , "right_to_left", "top_to_bottom", "bottom_to_top"]

for dir in directions:
    data = individual_data(data ,dir)
    final_data = final_data.append(data)

  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


In [15]:
final_data

Unnamed: 0,InvertedMouseY,NumberInBlock,direction,greenX0,greenX1,greenY0,greenY1,index,mouseX,mouseY,redX0,redX1,redY0,redY1,speed,timeMilSec,trialIdx,user
1,57,0,0,640,640,320,760,17596,728,57,1280,1280,320,760,0,93,450,0
1,28,1,0,640,640,320,760,9963,561,28,1280,1280,320,760,0,172,451,0
1,55,10,0,640,640,320,760,11235,657,55,1280,1280,320,760,0,733,460,0
1,44,11,0,640,640,320,760,9539,628,44,1280,1280,320,760,0,390,461,0
1,95,12,0,640,640,320,760,9327,652,95,1280,1280,320,760,0,577,462,0
1,23,13,0,640,640,320,760,10599,624,23,1280,1280,320,760,0,905,463,0
1,96,14,0,640,640,320,760,10175,633,96,1280,1280,320,760,0,656,464,0
1,26,15,0,640,640,320,760,10387,599,26,1280,1280,320,760,0,905,465,0
1,3,16,0,640,640,320,760,10387,592,3,1280,1280,320,760,0,655,466,0
1,50,17,0,640,640,320,760,12046,613,50,1280,1280,320,760,0,702,467,0


In [16]:
data.to_csv(path_or_buf= "C:\\Users\\Moghazy\\Desktop\\data1.csv")