In [63]:
import requests
import csv

def query_wikidata():
    sparql_query = """
    SELECT ?meteorite ?meteoriteLabel ?coordinateLocation ?countryLabel ?discoveryTime ?year
    WHERE
    {
        ?meteorite wdt:P31 wd:Q60186;  # instance of meteorite
                   wdt:P625 ?coordinateLocation.  # coordinate location of meteorite
        OPTIONAL { ?meteorite wdt:P17 ?country. }  # located in country
        OPTIONAL { ?meteorite wdt:P575 ?discoveryTime. }  # time of discovery
        OPTIONAL { ?meteorite wdt:P571 ?year. }  # year of discovery
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    """

    api_url = "https://query.wikidata.org/sparql"

    response = requests.get(api_url, params={'format': 'json', 'query': sparql_query})

    if response.status_code == 200:
        return response.json()['results']['bindings']
    else:
        print("Error fetching data from Wikidata:", response.text)
        return None

import re

def extract_coordinates(coordinate_str):
    # Regular expression -- extract coordinates
    pattern = r"Point\(([-\d.]+)\s+([-\d.]+)\)"
    match = re.match(pattern, coordinate_str)
    if match:
        return match.groups()
    else:
        return None, None

def save_to_csv(data):
    csv_file = 'meteorites_wikidata_global.csv'

    # Write data to csv
    with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # header
        writer.writerow(["Meteorite ID", "Meteorite Name", "Latitude", "Longitude", "Country", "Discovery Time", "Year"])
        # data rows
        for item in data:
            meteorite_id = item['meteorite']['value'].split('/')[-1]
            meteorite_name = item['meteoriteLabel']['value']
            coordinate_location = item['coordinateLocation']['value']
            latitude, longitude = extract_coordinates(coordinate_location)
            country = item['countryLabel']['value'] if 'countryLabel' in item else ''  # Handling if country is not available
            discovery_time = item['discoveryTime']['value'] if 'discoveryTime' in item else ''  # Handling if discovery time is not available
            year = item['year']['value'] if 'year' in item else ''  # Handling if year is not available
            writer.writerow([meteorite_id, meteorite_name, latitude, longitude, country, discovery_time, year])

def main():
    #query wikidata and save to csv
    result = query_wikidata()
    if result:
        save_to_csv(result)
        print("saved to meteorites_wikidata_global.csv")
    else:
        print("Failed")

if __name__ == "__main__":
    main()


Data saved to meteorites_wikidata_global.csv


In [67]:
import csv
import json

def csv_to_geojson(csv_file, geojson_file):
    # open csv
    with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        # list for geojson features
        features = []
        # iterate over csv rows
        for row in reader:
            # extract relevant rows
            meteorite_name = row['Meteorite Name']
            latitude_str = row['Latitude']
            longitude_str = row['Longitude']
            meteorite_id = row['Meteorite ID']  # adds meteorite id field for qid

            # checks for empty values
            if latitude_str and longitude_str:
                try:
                    latitude = float(latitude_str)
                    longitude = float(longitude_str)

                    # geojson feature for meteorite
                    feature = {
                        "type": "Feature",
                        "geometry": {
                            "type": "Point",
                            "coordinates": [latitude, longitude]  # coordinates in lat long order
                        },
                        "properties": {
                            "name": meteorite_name,
                            "meteorite_id": meteorite_id  # add meteorite id to properties
                        }
                    }
                    # add feature to list 
                    features.append(feature)
                except ValueError:
                    # skip invalid values
                    pass

    # geojson feature collection
    feature_collection = {
        "type": "FeatureCollection",
        "features": features
    }

    # writes to file
    with open(geojson_file, mode='w', encoding='utf-8') as outfile:
        json.dump(feature_collection, outfile, indent=2)

# csv to geojson
csv_to_geojson('meteorites_wikidata_global.csv', 'meteorites_wikidata_global.geojson')
