In [None]:
import csv

def fix_turnstile_data(filenames):
    '''
    Filenames is a list of MTA Subway turnstile text files. A link to an example
    MTA Subway turnstile text file can be seen at the URL below:
    http://web.mta.info/developers/turnstile.html
    
    As you can see, there are numerous data points included in each row of the
    a MTA Subway turnstile text file. 

    You want to write a function that will update each row in the text
    file so there is only one entry per row. A few examples below:
    A002,R051,02-00-00,05-28-11,00:00:00,REGULAR,003178521,001100739
    A002,R051,02-00-00,05-28-11,04:00:00,REGULAR,003178541,001100746
    A002,R051,02-00-00,05-28-11,08:00:00,REGULAR,003178559,001100775
    
    Write the updates to a different text file in the format of "updated_" + filename.
    For example:
        1) if you read in a text file called "turnstile_110521.txt"
        2) you should write the updated data to "updated_turnstile_110521.txt"

    The order of the fields should be preserved. Remember to read through the 
    Instructor Notes below for more details on the task. 
    
    In addition, here is a CSV reader/writer introductory tutorial:
    http://goo.gl/HBbvyy
    
    You can see a sample of the turnstile text file that's passed into this function
    and the the corresponding updated file by downloading these files from the resources:
    
    Field Description

    C/A,UNIT,SCP,STATION,LINENAME,DIVISION,DATE,TIME,DESC,ENTRIES,EXITS


    C/A      = Control Area (A002)
    UNIT     = Remote Unit for a station (R051)
    SCP      = Subunit Channel Position represents an specific address for a device (02-00-00)
        STATION  = Represents the station name the device is located at
        LINENAME = Represents all train lines that can be boarded at this station
                   Normally lines are represented by one character.  LINENAME 456NQR repersents train server for 4, 5, 6, N, Q, and R trains.
        DIVISION = Represents the Line originally the station belonged to BMT, IRT, or IND   
    DATE     = Represents the date (MM-DD-YY)
    TIME     = Represents the time (hh:mm:ss) for a scheduled audit event
    DESc     = Represent the "REGULAR" scheduled audit event (Normally occurs every 4 hours)
               1. Audits may occur more that 4 hours due to planning, or troubleshooting activities. 
               2. Additionally, there may be a "RECOVR AUD" entry: This refers to a missed audit that was recovered. 
    ENTRIES  = The comulative entry register value for a device
    EXIST    = The cumulative exit register value for a device

    Sample input file: turnstile_110528.txt
    Sample updated file: solution_turnstile_110528.txt
    '''
    
    arq = open(filenames, 'r')
    reader = csv.reader(arq, delimiter = ',')
    
    wrt = open('update_' + filenames, 'w')
    writer = csv.writer(wrt, delimiter = ',')
       
    for row in arq:
        linha = row.split(',') 
        ca = linha[0]
        unit = linha[1]
        scp = linha[2]

        for i in range(3,len(linha),5):
            linha[-1] = linha[-1].strip()
            data = [ca, unit, scp, linha[i], linha[i+1], linha[i+2], linha[i+3],linha[i+4]]
            writer.writerow(data)
            
    arq.close()
    wrt.close()


In [None]:
fix_turnstile_data('turnstile_110528.txt')