# COMP47670 - Assignment 1 by Peter Coogan (21202781)

## Data Identification and Collection

In [2]:
import urllib.request
import csv

### Chosen API
The chosen API is the USGS Earthquake Catalog - https://earthquake.usgs.gov/fdsnws/event/1

### Functions to Collect Data from API

In [3]:
def create_and_write_headings(file_name, headings): 
    with open(file_name, "w") as write_file:
        f = csv.writer(write_file)
        f.writerow(headings)
    
    return

def get_headings(raw_html):
    headings_long_string = raw_html.split('\n', 1)[0] # get the headings
    headings = headings_long_string.split(',') # split the headings string
    # insert a date heading and below, the date and time string is separated
    headings.insert(0, 'date')
    return headings

def get_lines(raw_html):
    # get the raw data as a list of lines 
    lines_long_string = raw_html.split('\n', 1)[1]
    lines = lines_long_string.split('\n')
    return lines

def write_data_to_file(file_name, url):
    response = urllib.request.urlopen(url) # request
    raw_html = response.read().decode() # decode request
    # get the headings from the html
    headings = get_headings(raw_html)
    # create the CSV file to store the data and add the headings
    create_and_write_headings(file_name, headings)
    # get the rest of the lines from the html
    lines = get_lines(raw_html)
    
    with open(file_name, 'a') as write_file:
        f = csv.writer(write_file)
        for line in lines:
            
            # blank lines can appear so ignore them
            if line == '':
                continue
            # remove the 'place' string in double quotes that causes issues
            remove = (line.split('"'))[1].split('"')[0]
            line = line.replace(remove, '')
            
            # separate the date and time string
            date_and_time = line.split('Z')[0] + 'Z'
            date = line.split('T')[0]
            time = (line.split('T'))[1].split('Z')[0]
            line = line.replace(date_and_time, date + ',' + time)
            
            # finally split the line at the commas and write it to file
            line = line.split(',')
            f.writerow(line)
    return
    
    
# generate the desired request and write the data
def raw_data_parser(file_name, starttime="", endtime="", minlatitude="-90", minlongitude="-180", maxlatitude="90",
                    maxlongitude="180", limit="20000", maxdepth="1000", mindepth="-100", maxmagnitude="", minmagnitude="",
                    orderby="time"):
    # ideally this string would be split over multiple lines however I have been unable to successfully implement this
    url = "https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv" + "&starttime=" + starttime + "&endtime=" + endtime + "&minlatitude=" + minlatitude + "&minlongitude=" + minlongitude + "&maxlatitude=" + maxlatitude + "&maxlongitude=" + maxlongitude + "&limit=" + limit + "&maxdepth=" + maxdepth + "&mindepth=" + mindepth + "&maxmagnitude=" + maxmagnitude + "&minmagnitude=" + minmagnitude + "&orderby=" + orderby
    
    write_data_to_file(file_name, url)
    
    return

### Fetch data for the past 20 years for earthquakes over magnitude 6
- These events are located within the 'Pacific Ring of Fire' which is a region of high tectonic activity

In [9]:
# default starttime is now - 30 days
# default endtime is now
twenty_year_highMag_data = "highMag_data_file.csv"
starttime = "2001-01-01"
endtime = "2021-01-01"
minlatitude = "-55"
maxlatitude = "65"
minlongitude = "120"
maxlongitude = "290"
minmagnitude = "6"
raw_data_parser(twenty_year_highMag_data, starttime=starttime, endtime=endtime, minlatitude=minlatitude, minlongitude=minlongitude,
               maxlatitude=maxlatitude, maxlongitude=maxlongitude, minmagnitude=minmagnitude)

### Fetch data for the last 50 years for earthquakes over magnitude 6
- These events are located within the 'Pacific Ring of Fire' which is a region of high tectonic activity

In [11]:
fifty_year_highMag_data = "long_time_data_file.csv"
starttime = "1971-01-01"
endtime = "2021-01-01"
minlatitude = "-55"
maxlatitude = "65"
minlongitude = "120"
maxlongitude = "290"
minmagnitude = "6"
raw_data_parser(fifty_year_highMag_data, starttime=starttime, endtime=endtime, minlatitude=minlatitude, minlongitude=minlongitude,
               maxlatitude=maxlatitude, maxlongitude=maxlongitude, minmagnitude=minmagnitude)

### Fetch data for the last 50 years for earthquakes in Ireland
- Earthquake data for Ireland only goes back to 1981

In [10]:
Ireland_data = "Ireland_data_file.csv"
starttime = "1971-01-01"
minlatitude = "51"
maxlatitude = "55"
minlongitude = "-11"
maxlongitude = "-5"
raw_data_parser(Ireland_data, starttime=starttime, minlatitude=minlatitude, minlongitude=minlongitude,
               maxlatitude=maxlatitude, maxlongitude=maxlongitude)