In [6]:
import netCDF4 as nc
import pandas as pd
import os 
import numpy as np
from datetime import datetime, timedelta
from itertools import product
import random
import concurrent.futures
import matplotlib.pyplot as plt
import glob

In [2]:
import math

def haversine_distance(lon1, lat1, lon2, lat2):
    """
    Calculate the haversine distance between two points
    on the earth (specified in decimal degrees)
    """
    # Convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    r = 6371  # Radius of earth in kilometers

    return c * r

def find_closest_point(longitudes, latitudes, reference_lon, reference_lat):
    """
    Find the closest point in a list of longitudes and latitudes
    to a given reference point (longitude and latitude)
    """
    min_distance = float('inf')
    closest_point = None

    for lon, lat in zip(longitudes, latitudes):
        distance = haversine_distance(reference_lon, reference_lat, lon, lat)
        if distance < min_distance:
            min_distance = distance
            closest_point = (lon, lat)

    return closest_point

In [34]:

# Define the pattern to match files
file_pattern = '*ds.nc'

# Use glob to find files matching the pattern
matching_files = glob.glob(os.path.join("./", file_pattern))

# Loop over matching files
for file_path in matching_files:
    # Process each file as needed
    nc_file = nc.Dataset(file_path, 'r')
    year = file_path[2:6]
    if year == "2024":
        # Example data dimensions
        num_timestamps = nc_file.dimensions["time"].size
        num_longitude = nc_file.dimensions["longitude"].size
        num_latitude = nc_file.dimensions["latitude"].size

        timestamps = pd.date_range(start=f'{year}-01-01', periods=num_timestamps, freq='6H')

        #random_long = random.sample(range(num_longitude), 2)
        #random_lat = random.sample(range(num_latitude), 2)
        #
        #longs = nc_file.variables["longitude"][:][random_long]
        #lats =  nc_file.variables["latitude"][:][random_lat]

        popular_cities_coords = [
            [3.0, 36.75],  # Algiers
            [-0.75, 35.75],  # Oran
            [1.5, 35.5],   # Tiaret
            [5.0, 36.75],   # Bejaia
            [5.5, 36.0],   # Setif
            [-0.5, 33.0],  # Naama
            [3.25, 34.75],   # Djelfa
            [5.75, 34.75],   # Biskra
            [0.75, 32.75],
            [2.75, 33.75],
            [6, 33.5],
            [7.5, 31.25],
            [2.75, 30.75],
            [2.75, 30.75],
            [0.5, 30],
            [-6.25, 28],
            [-1.5, 26],
            [4.25, 24],
            [4.5, 21],
            [8.5, 24],
        ]

        lat_index = []
        long_index = []
        for lon, lat in popular_cities_coords:
            long_index.append(np.where(nc_file.variables["longitude"][:] == lon )[0][0])
            lat_index.append(np.where(nc_file.variables["latitude"][:] == lat)[0][0])

        lon_lat_pairs = list(zip((lon for lon, lat in popular_cities_coords), (lat for lon, lat in popular_cities_coords)))


        # Get the product of timestamps and (longitude, latitude) pairs
        combinations = product(timestamps, lon_lat_pairs)

        df = pd.DataFrame(combinations, columns=['Time', 'Lon_Lat'])
        df['longitude'], df['latitude'] = zip(*df['Lon_Lat'])
        df = df.drop('Lon_Lat', axis=1)
        output_file = 'output.csv'
        df.to_csv(output_file, index=False)

        for var_name in nc_file.variables:
            lon_lat_index_pairs = zip(long_index, lat_index)
            index_combination = product(np.arange(num_timestamps).astype(int), lon_lat_index_pairs)

            if var_name != 'time' and var_name != 'longitude' and var_name != 'latitude' and var_name != 'expver':
                values = []
                varss = nc_file.variables[var_name][:]
                for time, (lon, lat) in list(index_combination):
                    if varss.shape.__len__() == 4:
                        value = varss[time][0][lon][lat]
                    else:
                        value = varss[time][lon][lat]
                    values.append(value)

                df[var_name] = values

        output_file = f'data/{year}.csv'
        df.to_csv(output_file, index=False)
        nc_file.close()