In [1]:
%pip install composable

Note: you may need to restart the kernel to use updated packages.


In [21]:
from composable import pipeable
from composable.strict import map, filter
import composable_records as rec
import composable_tuples as tup
from composable_glob import glob
from composable_utility import get, with_open, identity, apply
from composable_object import obj, attr
from composable_origami import fold
from operator import add

import re
import csv

# Project Overview
## - Merging both Attendance and Practice Quiz files with composable functions - 

### Goal for Part 4:
1. Combine all attendance and practice files into one CSV file simultaneously.
2. Add the course and section information as columns in the resulting file.
3. Use a single pipe with composable functions and records to accomplish this part (Writing the output files can be in a separate cell).

# Step 1: Convert txt to csv

## Import txt files

In [3]:
(txt_paths :=
 "./attendance_example_fixed_width/**/* Quiz*.txt"
 >> glob(recursive=True)
)

['./attendance_example_fixed_width/dsci494s7/Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 1 - User Attempts.txt',
 './attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 2 - User Attempts.txt',
 './attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 3 - User Attempts.txt',
 './attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 4 - User Attempts.txt',
 './attendance_example_fixed_width/stat180s18/Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width/stat491s1/Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width/stat491s1/Practice Quiz - Module 1 - User Attempts.txt',
 './attendance_example_fixed_width/stat491s1/Practice Quiz - Module 2 - User Attempts.txt',
 './attendance_example_fixed_width/stat491s1/Practice Quiz - Module 3 - User Attempts.txt',
 './attendance_example_fixed_width/stat491s1/Practice Quiz - Module 4 - User Attempts.txt']

In [4]:
(txt_paths :=
 "./attendance_example_fixed_width/**/* Quiz*.txt"
 >> glob(recursive=True)
)

with open(txt_paths[0], encoding ="utf-8") as f:
    lines = f.readlines()
lines[:3]

['14460432 au9747cp Jericho     Greer       1  1  1  2019-01-14 14:00:00 2019-01-14 14:06:00 100%\n',
 '14460432 au9747cp Jericho     Greer       2  1  1  2019-01-16 14:00:00 2019-01-16 14:08:00 100%\n',
 '14460432 au9747cp Jericho     Greer       3  1  1  2019-01-18 14:00:00 2019-01-18 14:05:00 100%\n']

In [5]:
(attendance_records_txt :=
 txt_paths
  >> map(rec.create(path = identity,                           
                    lines =  with_open(obj.readlines()),
                   )
        )
  >> map(rec.update(len_lines = lambda r: len(r.lines),  
                  )
       )
  >> filter(lambda r: r.len_lines > 1)  
  >> map(rec.update(split_lines = lambda r: [line.split() for line in r.lines]))
  >> map(rec.update(with_headers = lambda r: [
                                            {'ID': line[0],       
                                             'UserName': line[1],
                                             'FirstName': line[2],  
                                             "LastName": line[3],
                                             "Attempt": line[4],
                                             "Score": line[5],
                                             "Out_Of": line[6],
                                             "Attempt_Start_Date": line[7],
                                             "Attempt_Start_Time": line[8],
                                             "Attempt_End_Date": line[9],
                                             "Attempt_End_Time": line[10],
                                             "Percent": line[11]
                                             } for line in r.split_lines
                                            ]
                   )
        )
  >> map(rec.update(out_path = lambda r: 
      f"./attendance_example_fixed_width/{r.path.split('/')[-2]}/{r.path.split('/')[-1]}.csv"))
)

[Record(path='./attendance_example_fixed_width/dsci494s7/Attendance Quiz - User Attempts.txt',
  lines=['14460432 au9747cp Jericho     Greer       1  1  1  2019-01-14 14:00:00 2019-01-14 14:06:00 100%\n',
  '14460432 au9747cp Jericho     Greer       2  1  1  2019-01-16 14:00:00 2019-01-16 14:08:00 100%\n',
  '14460432 au9747cp Jericho     Greer       3  1  1  2019-01-18 14:00:00 2019-01-18 14:05:00 100%\n',
  '14460432 au9747cp Jericho     Greer       4  1  1  2019-01-23 14:00:00 2019-01-23 14:06:00 100%\n',
  '14460432 au9747cp Jericho     Greer       5  1  1  2019-01-25 14:00:00 2019-01-25 14:10:00 100%\n',
  '14460432 au9747cp Jericho     Greer       6  1  1  2019-01-28 14:04:00 2019-01-28 14:12:00 100%\n',
  '14460432 au9747cp Jericho     Greer       7  1  1  2019-02-01 14:56:00 2019-02-01 15:01:00 100%\n',
  '14460432 au9747cp Jericho     Greer       8  1  1  2019-02-04 14:58:00 2019-02-04 15:07:00 100%\n',
  '14460432 au9747cp Jericho     Greer       9  1  1  2019-02-06 14:00:00 

## Saving each csv files

In [6]:
for r in attendance_records_txt:
    with open(r['out_path'], mode='w', newline='') as out:
        
        header = r['with_headers'][0].keys()
        out.write(','.join(header) + '\n')
     
        out.writelines([f"{','.join(map(str, line.values()))}\n" for line in r['with_headers']])
        
    print(f"CSV file saved: {r['out_path']}")

CSV file saved: ./attendance_example_fixed_width/dsci494s7/Attendance Quiz - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 1 - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 2 - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 3 - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/dsci494s7/Practice Quiz - Module 4 - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/stat180s18/Attendance Quiz - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/stat491s1/Attendance Quiz - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/stat491s1/Practice Quiz - Module 1 - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/stat491s1/Practice Quiz - Module 2 - User Attempts.txt.csv
CSV file saved: ./attendance_example_fixed_width/stat4

# Step 2: Handling multiple csvs

## Import csv files

In [7]:
(paths :=
 "./attendance_example_fixed_width/*/*_quiz*.csv"
 >> glob(recursive=True)
)

['./attendance_example_fixed_width/dsci494s7/attendance_quiz.csv',
 './attendance_example_fixed_width/dsci494s7/Practice_quiz_Module 1.csv',
 './attendance_example_fixed_width/dsci494s7/Practice_quiz_Module 2.csv',
 './attendance_example_fixed_width/dsci494s7/Practice_quiz_Module 3.csv',
 './attendance_example_fixed_width/dsci494s7/Practice_quiz_Module 4.csv',
 './attendance_example_fixed_width/stat180s18/attendance_quiz.csv',
 './attendance_example_fixed_width/stat491s1/attendance_quiz.csv',
 './attendance_example_fixed_width/stat491s1/Practice_quiz_Module 1.csv',
 './attendance_example_fixed_width/stat491s1/Practice_quiz_Module 2.csv',
 './attendance_example_fixed_width/stat491s1/Practice_quiz_Module 3.csv',
 './attendance_example_fixed_width/stat491s1/Practice_quiz_Module 4.csv']

In [58]:
# get_out_path = lambda r: r['folder'] + '_Part4_' + r['type_'] + '.csv'

(quiz_records :=
 paths
 >> map(rec.create (path = identity,                          
                    lines =  with_open(obj.readlines()),       
                   )
       )
 >> map(rec.update(len_lines = lambda r: len(r.lines),     
                  )
       )
 >> filter(lambda r: r.len_lines > 1)
 >> map(rec.update(file = lambda r: r.path >> obj.split('/') >> get(-1),
                   folder = lambda r: re.split(r'/[A-Za-z]{4}\d{3}', r.path) >> get(0) >> apply(lambda s: s + '/'),
                   class_section = lambda r: r.path.split('/') >> get(2), 
                   class_ = lambda r: re.split(r's\d{1,2}', r.class_section) >> get(0),
                   section = lambda r: r.class_section.split('s') >> get(-1) >> apply(lambda s: 's' + s),
                   type_ = lambda r: r.file.split('_') >> get(0),
                   module = lambda r: r.file.split('_')[-1].split('.')[0].split()[-1] if r.type_ == 'Practice' else '',
                   header = lambda r: r.lines >> tup.first,
                   body = lambda r: r.lines >> tup.rest,
                   sequential = True
                  )
       )
  >> map(rec.apply(header = lambda s: 'Class,Section,Type,Module,' + s))
  >> map(rec.update(body = lambda r: r.body >> map(lambda line: str(r.class_) + ',' + 
                                                                str(r.section) + ',' + 
                                                                str(r.type_) + ',' + 
                                                                str(r.module) + ',' + line),
                  )
       )
  >> rec.group_by('type_')
  >> rec.map(rec.zip_at(['folder', 'header', 'type_', 'body']))
  >> rec.map(rec.apply(header = tup.first,
                       folder = tup.first,
                       type_ = tup.first,
                     )
           )
  >> rec.map(rec.update(out_path = lambda r: r['folder'] + '_Part4_' + r['type_'] + '.csv'))
  >> rec.map(rec.update(body = lambda r: r['body'] >> fold(add, [r['header']]),
                      )
            )
) >> rec.readable_output(num_keys=5, max_len_seq=3) # for record of lists

{'attendance': {'folder': './attendance_example_fixed_width/',
  'header': 'Class,Section,Type,Module,ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
  'type_': 'attendance',
  'body': ['Class,Section,Type,Module,ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
   'dsci494,s7,attendance,,14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n',
   'dsci494,s7,attendance,,14460432,au9747cp,Jericho,Greer,2,1,1,2019-01-16,14:00:00,2019-01-16,14:08:00,100%\n',
   'dsci494,s7,attendance,,14460432,au9747cp,Jericho,Greer,3,1,1,2019-01-18,14:00:00,2019-01-18,14:05:00,100%\n',
   'dsci494,s7,attendance,,14460432,au9747cp,Jericho,Greer,4,1,1,2019-01-23,14:00:00,2019-01-23,14:06:00,100%\n',
   'dsci494,s7,attendance,,14460432,au9747cp,Jericho,Greer,5,1,1,2019-01-25,14:00:00,2019-01-25,14:10:00,10

## Saving as a single csv

In [59]:
for type_, record in quiz_records.items():
    with open(record['out_path'], mode = 'w') as out:
        out.writelines(record['body'])
        print(f'Print all lines for {type_}\n')

Print all lines for attendance

Print all lines for Practice

