In [203]:
%pip install composable

Note: you may need to restart the kernel to use updated packages.


In [204]:
from composable import pipeable
from composable.strict import map, filter
from composable_glob import glob
from composable_utility import with_open
import composable_records as rec
import composable_tuples as tup

import re
import csv

In [205]:
from composable_utility import get, with_open, identity
from composable_object import obj, attr
from composable_origami import fold

# Project Overview.  
In this project, you will combine all of the text files by their type (attendance or practice quiz) in two ways: with comprehensions and using composable functions.

## Goal for part 1: 

1. Combine all attendance files into one CSV file.
2. Add the course and section information as columns in the resulting file.
3. Use comprehensions with records to accomplish this part.
4. You will need to make your own headers based on the README files.
5. There is no missing data, so you can use string split to split up the fixed widths.  BONUS: Do this using unfold instead of string split.
    - Hint for splitting.  Feel free to split the date-time column into two separate columns when splitting, just make sure you adjust the header accordingly.
    - BONUS: Do this using unfold instead of string split.
7. See Activity 2.7.1 - Combining a single type file with comprehensions for a demonstration of a similar problem.

## Convert txt to csv

In [206]:
(paths :=
 "./attendance_example_fixed_width/**/Attendance Quiz*.txt"
 >> glob(recursive=True)
)

['./attendance_example_fixed_width\\dsci494s7\\Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width\\stat180s18\\Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width\\stat491s1\\Attendance Quiz - User Attempts.txt']

In [207]:
(new_paths := [path.replace("\\", "/") for path in paths])

['./attendance_example_fixed_width/dsci494s7/Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width/stat180s18/Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width/stat491s1/Attendance Quiz - User Attempts.txt']

In [208]:
with open(paths[0], encoding ="utf-8") as f:
    lines = f.readlines()
lines[:3]

['14460432 au9747cp Jericho     Greer       1  1  1  2019-01-14 14:00:00 2019-01-14 14:06:00 100%\n',
 '14460432 au9747cp Jericho     Greer       2  1  1  2019-01-16 14:00:00 2019-01-16 14:08:00 100%\n',
 '14460432 au9747cp Jericho     Greer       3  1  1  2019-01-18 14:00:00 2019-01-18 14:05:00 100%\n']

In [209]:
split_lines = [line.split() for line in lines]
split_lines[:2]

[['14460432',
  'au9747cp',
  'Jericho',
  'Greer',
  '1',
  '1',
  '1',
  '2019-01-14',
  '14:00:00',
  '2019-01-14',
  '14:06:00',
  '100%'],
 ['14460432',
  'au9747cp',
  'Jericho',
  'Greer',
  '2',
  '1',
  '1',
  '2019-01-16',
  '14:00:00',
  '2019-01-16',
  '14:08:00',
  '100%']]

## Adding headers to the body of text

In [210]:
# List comprehension to create records (dicts) with keys: 'station_id', 'latitude', 'longitud
attendance = [
              {'ID': line[0],       
               'UserName': line[1],
               'FirstName': line[2],  
               "LastName": line[3],
               "Attempt": line[4],
               "Score": line[5],
               "Out_Of": line[6],
               "Attempt_Start_Date": line[7],
               "Attempt_Start_Time": line[8],
               "Attempt_End_Date": line[9],
               "Attempt_End_Time": line[10],
               "Percent": line[11]
              }
    for line in split_lines]

[record for record in attendance][:2]

[{'ID': '14460432',
  'UserName': 'au9747cp',
  'FirstName': 'Jericho',
  'LastName': 'Greer',
  'Attempt': '1',
  'Score': '1',
  'Out_Of': '1',
  'Attempt_Start_Date': '2019-01-14',
  'Attempt_Start_Time': '14:00:00',
  'Attempt_End_Date': '2019-01-14',
  'Attempt_End_Time': '14:06:00',
  'Percent': '100%'},
 {'ID': '14460432',
  'UserName': 'au9747cp',
  'FirstName': 'Jericho',
  'LastName': 'Greer',
  'Attempt': '2',
  'Score': '1',
  'Out_Of': '1',
  'Attempt_Start_Date': '2019-01-16',
  'Attempt_Start_Time': '14:00:00',
  'Attempt_End_Date': '2019-01-16',
  'Attempt_End_Time': '14:08:00',
  'Percent': '100%'}]

## The process of saving a txt as a csv file

In [234]:
# Define CSV file name
output_file = "./attendance_example_fixed_width/dsci494s7/attendance_quiz.csv"

# Extract headers from the first dictionary
headers = attendance[0].keys()

# Write to CSV
with open(output_file, mode="w", newline="") as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    
    writer.writeheader()  # Write headers
    writer.writerows(attendance)  # Write data rows

## Automate the process of saving txt as csv files

In [235]:
# Loop through each file path
for path in new_paths:
    class_section = path.split("/")[-2] # Get the folder name (e.g.'dsci494s7')

    # Define CSV file name based on class and section
    output_file = f"./attendance_example_fixed_width/{class_section}/attendance_quiz.csv"

    # Extract headers
    headers = attendance[0].keys()

    # Write to CSV
    if attendance:
        with open(output_file, mode="w", newline="") as file:
            writer = csv.DictWriter(file, fieldnames=headers)
            writer.writeheader()
            writer.writerows(attendance)

        print(f"CSV file saved: {output_file}")

CSV file saved: ./attendance_example_fixed_width/dsci494s7/attendance_quiz.csv
CSV file saved: ./attendance_example_fixed_width/stat180s18/attendance_quiz.csv
CSV file saved: ./attendance_example_fixed_width/stat491s1/attendance_quiz.csv


## Import csv files

In [236]:
(paths :=
 "./attendance_example_fixed_width/*/attendance_quiz.csv"
 >> glob(recursive=True)
)

['./attendance_example_fixed_width\\dsci494s7\\attendance_quiz.csv',
 './attendance_example_fixed_width\\stat180s18\\attendance_quiz.csv',
 './attendance_example_fixed_width\\stat491s1\\attendance_quiz.csv']

In [237]:
(csv_paths := [path.replace("\\", "/") for path in paths])

['./attendance_example_fixed_width/dsci494s7/attendance_quiz.csv',
 './attendance_example_fixed_width/stat180s18/attendance_quiz.csv',
 './attendance_example_fixed_width/stat491s1/attendance_quiz.csv']

## Unfolding the file paths into records

In [238]:
get_file = lambda path: path.split('/')[-1]
get_folder = lambda path: '/'.join(path.split('/')[:-1]) + '/'
get_class = lambda path: re.search(r"([A-Za-z]{4})\d{3}", path).group()
get_section = lambda path: re.search(r"s\d{1,2}", path).group()
get_type = lambda path: path.split('/')[-1].split(' Quiz')[0]

In [239]:
(paths_and_lines :=
 [ {'file': get_file(path),
    'folder': get_folder(path),
    'class': get_class(path),
    'section': get_section(path),
    'type':   get_type(path),
    'lines':  with_open(lambda f: f.readlines(), path)
   }
     for path in csv_paths
 ]
) >> map(rec.heads(2))

[{'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/dsci494s7/',
  'class': 'dsci494',
  'section': 's7',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
   '14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n']},
 {'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/stat180s18/',
  'class': 'stat180',
  'section': 's18',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
   '14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n']},
 {'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/stat491s1/',
  'class': 'stat491',
  'section': 's1',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,

## Extracting the headers and bodies

In [240]:
(header_and_body := 
 [record | {'header': record['lines'][0],  # This is `first` element
            'body'  : record['lines'][1:], # This is the `rest`
           }
     for record in paths_and_lines
 ]
) >> map(rec.heads(2))

[{'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/dsci494s7/',
  'class': 'dsci494',
  'section': 's7',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
   '14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n'],
  'header': 'ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
  'body': ['14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n',
   '14460432,au9747cp,Jericho,Greer,2,1,1,2019-01-16,14:00:00,2019-01-16,14:08:00,100%\n']},
 {'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/stat180s18/',
  'class': 'stat180',
  'section': 's18',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,

## Adding in "class" and "section" to the headers and bodies

In [243]:
(with_class_section_columns :=
 [record | {'header': 'Class,Section,' + record['header'],
            'body'  : [f"{record['class']},{record['section']}," + line for line in record['body']]
           }
     for record in header_and_body
 ]
) >> map(rec.heads(2))

[{'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/dsci494s7/',
  'class': 'dsci494',
  'section': 's7',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
   '14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n'],
  'header': 'Class,Section,ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
  'body': ['dsci494,s7,14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n',
   'dsci494,s7,14460432,au9747cp,Jericho,Greer,2,1,1,2019-01-16,14:00:00,2019-01-16,14:08:00,100%\n']},
 {'file': 'attendance_quiz.csv',
  'folder': './attendance_example_fixed_width/stat180s18/',
  'class': 'stat180',
  'section': 's18',
  'type': 'attendance_quiz.csv',
  'lines': ['ID,UserName,FirstName,LastName,Atte

## Combining bodies - Approach 1 - pull out the information

In [267]:
header = with_class_section_columns[0]['header']
folder =  "./" + with_class_section_columns[0]['folder'].split("/")[1] + "/"

header, folder

('Class,Section,ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
 './attendance_example_fixed_width/')

In [258]:
(bodies := 
 [ record['body'] for record in with_class_section_columns]
)

[['dsci494,s7,14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,2,1,1,2019-01-16,14:00:00,2019-01-16,14:08:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,3,1,1,2019-01-18,14:00:00,2019-01-18,14:05:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,4,1,1,2019-01-23,14:00:00,2019-01-23,14:06:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,5,1,1,2019-01-25,14:00:00,2019-01-25,14:10:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,6,1,1,2019-01-28,14:04:00,2019-01-28,14:12:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,7,1,1,2019-02-01,14:56:00,2019-02-01,15:01:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,8,1,1,2019-02-04,14:58:00,2019-02-04,15:07:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,9,1,1,2019-02-06,14:00:00,2019-02-06,14:10:00,100%\n',
  'dsci494,s7,14460432,au9747cp,Jericho,Greer,10,1,1,2019-02-08,14:59:00,2019-02-08,15:08:0

In [269]:
from functools import reduce

(out_lines := 
 reduce(lambda acc, el: acc + el, bodies, [header])
)

# print(out_lines[:5])

['Class,Section,ID,UserName,FirstName,LastName,Attempt,Score,Out_Of,Attempt_Start_Date,Attempt_Start_Time,Attempt_End_Date,Attempt_End_Time,Percent\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,1,1,1,2019-01-14,14:00:00,2019-01-14,14:06:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,2,1,1,2019-01-16,14:00:00,2019-01-16,14:08:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,3,1,1,2019-01-18,14:00:00,2019-01-18,14:05:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,4,1,1,2019-01-23,14:00:00,2019-01-23,14:06:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,5,1,1,2019-01-25,14:00:00,2019-01-25,14:10:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,6,1,1,2019-01-28,14:04:00,2019-01-28,14:12:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,7,1,1,2019-02-01,14:56:00,2019-02-01,15:01:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,8,1,1,2019-02-04,14:58:00,2019-02-04,15:07:00,100%\n',
 'dsci494,s7,14460432,au9747cp,Jericho,Greer,9,1

## Saving the file as a csv

In [268]:
output_file = folder + 'attendance_quiz.csv'

with open(output_file, mode = 'w') as out:
    out.writelines(out_lines)
    print(f"CSV file saved: {output_file}")

CSV file saved: ./attendance_example_fixed_width/attendance_quiz.csv


## Deliverables.  Submit the following

1. A word document documenting your code/solution,
2. A notebook containing your work.  This documents should assume that the unzipped file and all the composable modules are in the root directory.