In [3]:


import csv
import json

def convert_csv_to_json(csv_filepath, json_filepath):
    # Create a list to hold rows read from the CSV
    data = []

    # Open the CSV file and read data
    with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            # Handle empty strings and convert to correct data types
            row['location_latitude'] = float(row['location_latitude']) if row['location_latitude'] else None
            row['location_longitude'] = float(row['location_longitude']) if row['location_longitude'] else None
            row['heading'] = float(row['heading']) if row['heading'] else None
            print(row)
            data.append(row)

    # Write the data to a JSON file
    with open(json_filepath, 'w', encoding='utf-8') as jsonfile:
        json.dump(data, jsonfile, indent=4)

# Example usage:
csv_path = 'geo_stats_data_7_days.csv'
json_path = 'geo_stats_data_7_days.json'
convert_csv_to_json(csv_path, json_path)




In [31]:
import json
from datetime import datetime
from geopy.distance import geodesic
from geopy import distance
from tqdm import tqdm
from math import radians, cos, sin, asin, sqrt

def haversine(lat1, lon1, lat2, lon2, threshold=10):
    """
    Calculate the great circle distance in kilometers between two points
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    if(lon1 == None or lon2 == None or lat1 == None or lat2 == None):
        # print(lon1,lat1,lon2,lat2)
        return False
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of earth in kilometers
    return c * r <= threshold

# Load JSON data from files
def load_json_data(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        return json.load(file)

# Save JSON data to files
def save_json_data(data, filepath):
    with open(filepath, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=4)

# Check if a ship visits a port based on proximity
def check_proximity(ship_coords, port_coords, threshold_km=10):
    d = distance.great_circle(ship_coords, port_coords).kilometers
    return d <= threshold_km

# Process port visits based on ship locations and port coordinates
def process_visits(ships_data, ports_data):
    port_visits_count = {port['port_name']: 0 for port in ports_data}
    port_coords = {port['port_name']: (port['geo_location_latitude'], port['geo_location_longitude']) for port in ports_data}

    # To store ship visit details
    ship_visits = {ship['site_name']: set() for ship in ships_data}

    # Analyze each ship's location to see if it is close to any port
    for ship in tqdm(ships_data, desc="Processing ships", unit="ship"):
        ship_coords = (ship['location_latitude'], ship['location_longitude'])
        for port_name, coords in port_coords.items():
            if haversine(ship_coords[0],ship_coords[1],coords[0],coords[1]):
                ship_visits[ship['site_name']].add(port_name)
                port_visits_count[port_name] += 1

    # Create a new list of ship data with ports they visited
    updated_ships_data = []
    for ship in ships_data:
        visited_ports = list(ship_visits[ship['site_name']])
        updated_ships_data.append({
            **ship,
            'visited_ports': visited_ports
        })

    # Update port data with visit counts
    for port in ports_data:
        port['visit_count'] = port_visits_count[port['port_name']]

    return updated_ships_data, ports_data

# Example usage
if __name__ == "__main__":
    ships_data = load_json_data('geo_stats_data_7_days.json')
    ports_data = load_json_data('port_geo_location.json')

    updated_ships_data, updated_ports_data = process_visits(ships_data, ports_data)

    save_json_data(updated_ports_data, 'updated_ports_data.json')
    save_json_data(updated_ships_data, 'updated_ships_data.json')


Processing ships: 100%|██████████| 776853/776853 [32:02<00:00, 404.13ship/s] 
