In [1]:
import pandas as pd
import csv
import re

# Function Declarations

In [2]:
def parse_file(csv_filename, file_path):
    """
    Creates a CSV file and parses a text file line-by-line and inserts in comma separated format.

    Args:
    - csv_filename: path to csv file (string)
    - file_path: text file to parse (string)
    """
    data_file = open(csv_filename, 'w')
    csv_writer = csv.writer(data_file, delimiter=",")

    with open(file_path) as txt_file:
        
        for line in txt_file:
            csv_writer.writerow(line.split())
        
        data_file.close()


def dms_to_dd(dms):
    """
    Converts DMS (Degree Minutes Seconds) coordinates separated by "." and converts it into DD (decimal Degress) format.

    Args:
    - DMS formatted coordinate (string)
    """
    parts = re.split('[\."]+', dms) # create list by splitting at "."
    degrees = parts[0]
    minutes = parts[1]
    seconds = parts[2]
    dd = float(degrees) + float(minutes)/60 + float(seconds)/36000
    return dd


def dd_to_dms(deg):
    """
    Converts DD (decimal Degress) into DMS (Degree Minutes Seconds) coordinates format.

    Args:
    - DD formatted coordinate (float)
    """
    d = int(deg)
    md = abs(deg - d) * 60
    m = int(md)
    sd = (md - m) * 60
    return [d, m, sd]


def dms_to_dd_dir():
    """
    Converts DMS (Degree Minutes Seconds) coordinates separated by "." and converts it into DD (decimal Degress) format.
    Supports direction.

    Args:
    - DMS formatted coordinate (string)
    """
    parts = re.split('[^\d\w]+', dms)
    parts[0] = degrees
    parts[1] = minutes
    parts[2] = seconds
    parts[3] = direction
    dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60);
    if direction == 'S' or direction == 'W':
        dd *= -1
    return dd;


def dd_to_dms_dir(deg):
    """
    Converts DD (decimal Degress) into DMS (Degree Minutes Seconds) coordinates format. 
    Supports direction.

    Args:
    - DD formatted coordinate (float)
    """
    d = int(deg)
    md = abs(deg - d) * 60
    m = int(md)
    sd = (md - m) * 60
    return [d, m, sd]

In [3]:
parse_file('fishdata_dms.csv', 'preformatted_data.txt')

In [5]:
df = pd.read_csv('fishdata_dms.csv')

In [6]:
df['latitude'] = df['latitude'].apply(lambda x: dms_to_dd(x))
df['longitude'] = df['longitude'].apply(lambda x: dms_to_dd(x))
df

Unnamed: 0,Fish_type,Location_name,latitude,longitude
0,Bornholm_Laks,31’eren,55.408333,14.986111
1,Bornholm_Laks,Anders_Sten,55.572222,15.150000
2,Bornholm_Laks,Davids_Banke,55.366667,14.683333
3,Bornholm_Laks,Don’s_Knold,55.444444,14.902778
4,Bornholm_Laks,Gravstenen_øst,55.483333,14.925000
...,...,...,...,...
344,unknown,Mittelsten,55.602222,14.651944
345,unknown,Ruski,55.601111,14.616667
346,unknown,Synrenabbe,55.519444,14.800000
347,unknown,Synrenabbe,55.551389,14.617222


In [7]:
output = df.to_csv('fishdata_dd.csv', index=False)