# Functions to read in XML, CSV, JSON and store as Python List.

- Functions take a filename and return a python list with nested dictionaries as the records in each file format.
- All three read functions return the same intermediate format, python list, nested dictionaries.

# Functions to write XML, CSV, JSON from Python List. 
- Functions each take a filename and data object created from read functions, (intermediate format, python list) and return the written files.
- These functions currently overwrite whatever file is used in their filename parameter.

In [1]:
# Packages
import csv
import operator
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import json
from collections import OrderedDict
import xml.etree.ElementTree as ET

 

**Read and Write CSV Functions:**

In [2]:
# Converting CSV TO DICTIONARY:


def read_csv_file(filename):
    
    # Dictionary Object to store data:
    data = []
    # Open CSV File:
    with open(filename, 'r') as file:
        # Pull data from CSV:
        reader = csv.DictReader(file)
        # Loop through file records,
        for row in reader:
            data.append(row)
    return data

In [3]:
# Testing read csv:
csv_test = read_csv_file('crime.csv')
csv_test[0]

{'address': '3108 OCCIDENTAL DR',
 'beat': '3C        ',
 'cdatetime': '1/1/06 0:00',
 'crimedescr': '10851(A)VC TAKE VEH W/O OWNER',
 'district': '3',
 'grid': '1115',
 'latitude': '38.55042047',
 'longitude': '-121.3914158',
 'ucr_ncic_code': '2404'}

In [4]:

def write_csv_file(filename, data):
    """
    Takes a filename (to be written to) and a data object 
    (created by one of the read functions). 
    Writes the data in the CSV format.
    """
    # Generate instance of new file to write:
    with open(filename, "w",newline = "") as outfile:
        
        # Pass CSV file and dictionary keys as column headers:
        writer = csv.DictWriter(outfile,fieldnames = data[0].keys())
        
        writer.writeheader()
        
        for entry in data:
            writer.writerow(entry)
    return filename


In [5]:
# Testing write csv function:
# Test write result:
new_file = write_csv_file("crime.csv",csv_test)


with open(new_file) as csvfile:
    reader = csv.reader(csvfile,delimiter = ",")
    for row in reader:
        print(row)

['address', 'beat', 'cdatetime', 'crimedescr', 'district', 'grid', 'latitude', 'longitude', 'ucr_ncic_code']
['3108 OCCIDENTAL DR', '3C        ', '1/1/06 0:00', '10851(A)VC TAKE VEH W/O OWNER', '3', '1115', '38.55042047', '-121.3914158', '2404']
['2082 EXPEDITION WAY', '5A        ', '1/1/06 0:00', '459 PC  BURGLARY RESIDENCE', '5', '1512', '38.47350069', '-121.4901858', '2204']
['4 PALEN CT', '2A        ', '1/1/06 0:00', '10851(A)VC TAKE VEH W/O OWNER', '2', '212', '38.65784584', '-121.4621009', '2404']
['22 BECKFORD CT', '6C        ', '1/1/06 0:00', '476 PC PASS FICTICIOUS CHECK', '6', '1443', '38.50677377', '-121.4269508', '2501']
['3421 AUBURN BLVD', '2A        ', '1/1/06 0:00', '459 PC  BURGLARY-UNSPECIFIED', '2', '508', '38.6374478', '-121.3846125', '2299']
['5301 BONNIEMAE WAY', '6B        ', '1/1/06 0:00', '530.5 PC USE PERSONAL ID INFO', '6', '1084', '38.52697863', '-121.4513383', '2604']
['2217 16TH AVE', '4A        ', '1/1/06 0:00', '459 PC  BURGLARY VEHICLE', '4', '957', '38

**Read and Write JSON Functions:**

In [6]:
def read_json_file(filename):
    """
    Similar to read_csv_file, except works for JSON files.
    """
    with open(filename) as json_file:
        data = json.load(json_file)
    return data

In [7]:
# Testing JSON read:
my_json_dict = read_json_file("crime.json")
print(type(my_json_dict))
my_json_dict[0]

<class 'list'>


{'address': '3108 OCCIDENTAL DR',
 'beat': '3C        ',
 'cdatetime': '1/1/06 0:00',
 'crimedescr': '10851(A)VC TAKE VEH W/O OWNER',
 'district': '3',
 'grid': '1115',
 'latitude': '38.55042047',
 'longitude': '-121.3914158',
 'ucr_ncic_code': '2404'}

In [8]:

def write_json_file(filename, data):
    """
    Writes JSON files. Similar to write_csv_file.
    """
    with open(filename, 'w') as file:
        json_new = json.dumps(data)
        file.write(json_new)
    return filename

In [9]:
# Testing JSON Write Function:
new_json_file = write_json_file("crime.json",my_json_dict)
with open(new_json_file) as file:
    data = json.load(file)
    print(data)

[{'address': '3108 OCCIDENTAL DR', 'beat': '3C        ', 'cdatetime': '1/1/06 0:00', 'crimedescr': '10851(A)VC TAKE VEH W/O OWNER', 'district': '3', 'grid': '1115', 'latitude': '38.55042047', 'longitude': '-121.3914158', 'ucr_ncic_code': '2404'}, {'address': '2082 EXPEDITION WAY', 'beat': '5A        ', 'cdatetime': '1/1/06 0:00', 'crimedescr': '459 PC  BURGLARY RESIDENCE', 'district': '5', 'grid': '1512', 'latitude': '38.47350069', 'longitude': '-121.4901858', 'ucr_ncic_code': '2204'}, {'address': '4 PALEN CT', 'beat': '2A        ', 'cdatetime': '1/1/06 0:00', 'crimedescr': '10851(A)VC TAKE VEH W/O OWNER', 'district': '2', 'grid': '212', 'latitude': '38.65784584', 'longitude': '-121.4621009', 'ucr_ncic_code': '2404'}, {'address': '22 BECKFORD CT', 'beat': '6C        ', 'cdatetime': '1/1/06 0:00', 'crimedescr': '476 PC PASS FICTICIOUS CHECK', 'district': '6', 'grid': '1443', 'latitude': '38.50677377', 'longitude': '-121.4269508', 'ucr_ncic_code': '2501'}, {'address': '3421 AUBURN BLVD',

**XML Read and Write Functions:**

In [11]:

def read_xml_file(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    data_list = []
    for record in root:
        record_dict = {}
        for column in record:
            record_dict.update({column.tag: column.text})
        data_list.append(record_dict)
    return data_list

In [12]:
# Testing XML Write function:
xml_test = read_xml_file('crime.xml')
xml_test

[{'address': '3108 OCCIDENTAL DR',
  'beat': '3C        ',
  'cdatetime': '1/1/06 0:00',
  'crimedescr': '10851(A)VC TAKE VEH W/O OWNER',
  'district': '3',
  'grid': '1115',
  'latitude': '38.55042047',
  'longitude': '-121.3914158',
  'ucr_ncic_code': '2404'},
 {'address': '2082 EXPEDITION WAY',
  'beat': '5A        ',
  'cdatetime': '1/1/06 0:00',
  'crimedescr': '459 PC  BURGLARY RESIDENCE',
  'district': '5',
  'grid': '1512',
  'latitude': '38.47350069',
  'longitude': '-121.4901858',
  'ucr_ncic_code': '2204'},
 {'address': '4 PALEN CT',
  'beat': '2A        ',
  'cdatetime': '1/1/06 0:00',
  'crimedescr': '10851(A)VC TAKE VEH W/O OWNER',
  'district': '2',
  'grid': '212',
  'latitude': '38.65784584',
  'longitude': '-121.4621009',
  'ucr_ncic_code': '2404'},
 {'address': '22 BECKFORD CT',
  'beat': '6C        ',
  'cdatetime': '1/1/06 0:00',
  'crimedescr': '476 PC PASS FICTICIOUS CHECK',
  'district': '6',
  'grid': '1443',
  'latitude': '38.50677377',
  'longitude': '-121.42

In [29]:
import xml.etree.ElementTree as ET

def write_xml_file(filename,data_list):
    # there should be a single "data" node,
    root = ET.Element("data")
    for record in data_list:
        # with as many record nodes as needed
        record_node = ET.SubElement(root, "record")
        for column, value in record.items():
            # in each record is column node with text content for that record
            column_node = ET.SubElement(record_node, column)
            column_node.text = value
    tree = ET.ElementTree(root)
    tree.write(filename)
    #return filename


# Type test:
xml_file = write_xml_file('crime.xml',my_json_dict)
print(type(xml_file))


<class 'str'>


**Testing XML Write Function:**

In [31]:
with open(xml_file, 'r') as f:
    data = f.read()

Bs_data = BeautifulSoup(data, "xml")
print(Bs_data)

<?xml version="1.0" encoding="utf-8"?>
<data><record><address>3108 OCCIDENTAL DR</address><beat>3C        </beat><cdatetime>1/1/06 0:00</cdatetime><crimedescr>10851(A)VC TAKE VEH W/O OWNER</crimedescr><district>3</district><grid>1115</grid><latitude>38.55042047</latitude><longitude>-121.3914158</longitude><ucr_ncic_code>2404</ucr_ncic_code></record><record><address>2082 EXPEDITION WAY</address><beat>5A        </beat><cdatetime>1/1/06 0:00</cdatetime><crimedescr>459 PC  BURGLARY RESIDENCE</crimedescr><district>5</district><grid>1512</grid><latitude>38.47350069</latitude><longitude>-121.4901858</longitude><ucr_ncic_code>2204</ucr_ncic_code></record><record><address>4 PALEN CT</address><beat>2A        </beat><cdatetime>1/1/06 0:00</cdatetime><crimedescr>10851(A)VC TAKE VEH W/O OWNER</crimedescr><district>2</district><grid>212</grid><latitude>38.65784584</latitude><longitude>-121.4621009</longitude><ucr_ncic_code>2404</ucr_ncic_code></record><record><address>22 BECKFORD CT</address><beat>6

In [17]:
# How it should look:
from bs4 import BeautifulSoup
with open('crime.xml', 'r') as f:
    data = f.read()

Bs_data = BeautifulSoup(data, "xml")
print(Bs_data)

<?xml version="1.0" encoding="utf-8"?>
<data><record><address>3108 OCCIDENTAL DR</address><beat>3C        </beat><cdatetime>1/1/06 0:00</cdatetime><crimedescr>10851(A)VC TAKE VEH W/O OWNER</crimedescr><district>3</district><grid>1115</grid><latitude>38.55042047</latitude><longitude>-121.3914158</longitude><ucr_ncic_code>2404</ucr_ncic_code></record><record><address>2082 EXPEDITION WAY</address><beat>5A        </beat><cdatetime>1/1/06 0:00</cdatetime><crimedescr>459 PC  BURGLARY RESIDENCE</crimedescr><district>5</district><grid>1512</grid><latitude>38.47350069</latitude><longitude>-121.4901858</longitude><ucr_ncic_code>2204</ucr_ncic_code></record><record><address>4 PALEN CT</address><beat>2A        </beat><cdatetime>1/1/06 0:00</cdatetime><crimedescr>10851(A)VC TAKE VEH W/O OWNER</crimedescr><district>2</district><grid>212</grid><latitude>38.65784584</latitude><longitude>-121.4621009</longitude><ucr_ncic_code>2404</ucr_ncic_code></record><record><address>22 BECKFORD CT</address><beat>6

In [None]:
# Continue with conversion functions as needed for different file types.