<a href="https://colab.research.google.com/github/HiraSSU/cs115-hw4/blob/main/CS115InClassDataParsingComplete.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Given a file that contains Tab delimited text data, write a program that:

1.Reads a filename as an argument

2.Then, opens that file, and reads all the data from the file, then converts it to one of the following formats, based on a second argument:

-c   CSV format

-j  JSON format

-x  XML format

3.And saves the new file, with the appropriate file format in the current directory

Upload word doc showing the execution, with a link to your source code, and the 3 files (one each of CSV, JSON, and XML) created by your program.

In [5]:
import sys
import csv
import json
import xml.etree.ElementTree as ET

def clean_for_xml_tag(original_tag):
    """
    XML tags have rules, like they can't contain spaces or certain symbols.
    This function takes a header name (like "Home State") and makes it
    a valid XML tag (like "Home_State").
    """
    tag = original_tag.replace(' ', '_')
    tag = tag.replace('(', '').replace(')', '').replace('/', '_')

    if tag and tag[0].isdigit():
        tag = '_' + tag

    return tag

def read_data_from_file(filename):
    """
    Opens the input file, reads its contents line by line, and turns
    the data into a list of dictionaries. This format is easy to work with.
    Example: [ {'Player': 'Peyton Manning', 'Year': '2013'}, ... ]
    """
    all_players_data = []
    header = []

    # Assistance from Gemini for the exceptions
    try:
        # Try opening with 'utf-8' first (most common)
        with open(filename, 'r', encoding='utf-8') as file:
            reader = csv.reader(file, delimiter='\t')
            header = next(reader)
            for row in reader:
                if len(row) == len(header):
                    player_dict = {}
                    for i, header_label in enumerate(header):
                         player_dict[header_label.strip()] = row[i].strip() # Strip whitespace
                    all_players_data.append(player_dict)
                else:
                     print(f"Skipping row due to mismatch in number of columns: {row}", file=sys.stderr)

    except UnicodeDecodeError:
        print(f"UnicodeDecodeError: Could not decode the file with 'utf-8' encoding. Trying 'latin-1'.", file=sys.stderr)
        try:
            # If 'utf-8' fails, try 'latin-1'
            with open(filename, 'r', encoding='latin-1') as file:
                reader = csv.reader(file, delimiter='\t')
                header = next(reader)
                for row in reader:
                     if len(row) == len(header):
                        player_dict = {}
                        for i, header_label in enumerate(header):
                            player_dict[header_label.strip()] = row[i].strip() # Strip whitespace
                        all_players_data.append(player_dict)
                     else:
                         print(f"Skipping row due to mismatch in number of columns: {row}", file=sys.stderr)

        except UnicodeDecodeError:
            print(f"UnicodeDecodeError: Could not decode the file with 'latin-1' encoding. Trying 'cp1252'.", file=sys.stderr)
            try:
                # If 'latin-1' fails, try 'cp1252'
                with open(filename, 'r', encoding='cp1252') as file:
                    reader = csv.reader(file, delimiter='\t')
                    header = next(reader)
                    for row in reader:
                        if len(row) == len(header):
                            player_dict = {}
                            for i, header_label in enumerate(header):
                                player_dict[header_label.strip()] = row[i].strip() # Strip whitespace
                            all_players_data.append(player_dict)
                        else:
                            print(f"Skipping row due to mismatch in number of columns: {row}", file=sys.stderr)
            except Exception as e:
                print(f"An error occurred while reading the file with 'cp1252' encoding: {e}", file=sys.stderr)
                sys.exit(1)
        except Exception as e:
            print(f"An error occurred while reading the file with 'latin-1' encoding: {e}", file=sys.stderr)
            sys.exit(1)
    except FileNotFoundError:
        # If the file doesn't exist, print an error and exit.
        print(f"Error: The file '{filename}' was not found.")
        sys.exit(1)
    except Exception as e:
        print(f"An error occurred while reading the file: {e}")
        sys.exit(1)

    # Return the list of all player data we collected.
    return all_players_data

def save_as_csv(data, output_filename):
    """Saves the data to a file in CSV format."""
    print(f"Saving data as CSV to {output_filename}...")
    try:
        with open(output_filename, 'w', newline='', encoding='utf-8') as file:
            if not data:
                print("No data to write to CSV.")
                return
            header = data[0].keys()

            writer = csv.writer(file)

            writer.writerow(header)

            for player_dict in data:
                writer.writerow(player_dict.values())
        print("Successfully saved CSV file.")
    except Exception as e:
        print(f"An error occurred while saving the CSV file: {e}")

def save_as_json(data, output_filename):
    """Saves the data to a file in JSON format."""
    print(f"Saving data as JSON to {output_filename}...")
    try:
        with open(output_filename, 'w', encoding='utf-8') as file:
            json.dump(data, file, indent=4)
        print("Successfully saved JSON file.")
    except Exception as e:
        print(f"An error occurred while saving the JSON file: {e}")

def save_as_xml(data, output_filename):
    """Saves the data to a file in XML format."""
    print(f"Saving data as XML to {output_filename}...")
    try:
        root = ET.Element("players")

        for player_dict in data:
            player_element = ET.SubElement(root, "player")

            for key, value in player_dict.items():
                xml_tag = clean_for_xml_tag(key)
                child_element = ET.SubElement(player_element, xml_tag)
                child_element.text = value if value is not None else ""

        try:
            ET.indent(root)
        except AttributeError:
            print("Warning: ET.indent requires Python 3.9+. XML may not be pretty-printed.", file=sys.stderr)


        tree = ET.ElementTree(root)
        tree.write(output_filename, encoding='unicode', xml_declaration=True)
        print("Successfully saved XML file.")
    except Exception as e:
        print(f"An error occurred while saving the XML file: {e}")


def main():

    if len(sys.argv) != 3:
        print("--- How to use this script ---")
        print("Usage: python converter_simple.py <input_filename> <format_flag>")
        print("Example: python converter_simple.py \"data.txt\" -c")
        print("\nAvailable format flags:")
        print("  -c   for CSV format")
        print("  -j   for JSON format")
        print("  -x   for XML format")
        sys.exit(1)

    input_file = sys.argv[1]
    format_flag = sys.argv[2]

    output_base_name = input_file.split('.')[0]

    player_data = read_data_from_file(input_file)

    if not player_data:
        print("No data was found in the input file.")
        return

    if format_flag == '-c':
        output_file = output_base_name + '.csv'
        save_as_csv(player_data, output_file)
    elif format_flag == '-j':
        output_file = output_base_name + '.json'
        save_as_json(player_data, output_file)
    elif format_flag == '-x':
        output_file = output_base_name + '.xml'
        save_as_xml(player_data, output_file)
    else:
        print(f"Error: '{format_flag}' is not a valid format flag. Use -c, -j, or -x.")
        sys.exit(1)

if __name__ == "__main__":
    sys.argv = ['converter_simple.py', '/content/NFL Offensive Player stats, 1999-2013.txt', '-c']
    main()

Saving data as CSV to /content/NFL Offensive Player stats, 1999-2013.csv...
Successfully saved CSV file.


UnicodeDecodeError: Could not decode the file with 'utf-8' encoding. Trying 'latin-1'.
