In [16]:
import numpy as np
import pandas as pd

# Set the random seed for reproducibility
np.random.seed(42)

# Define the boundaries of France's latitude and longitude
min_lat, max_lat = 41.333, 51.124
min_lon, max_lon = -5.5, 9.662

# Generate random latitude and longitude coordinates within the boundaries
lats = np.random.uniform(min_lat, max_lat, 1000)
lons = np.random.uniform(min_lon, max_lon, 1000)

# Create a Pandas DataFrame to store the coordinates
df = pd.DataFrame({'Latitude': lats, 'Longitude': lons})

# Display the first few rows of the DataFrame
print(df.head())

    Latitude  Longitude
0  45.000122  -2.693015
1  50.641444   2.716302
2  48.499953   7.735605
3  47.194465   5.601994
4  42.860579   6.729080


In [17]:
import requests

# Make a GET request to a URL
url = "https://france-geojson.gregoiredavid.fr/repo/regions.geojson"
response = requests.get(url)

# Check the response status code
if response.status_code == 200:
    # Request was successful
    data = response.json()  # Extract the JSON data from the response
    # Process the data as needed
else:
    # Request failed
    print("GET request failed with status code:", response.status_code)

In [18]:
features=data.get('features')

In [19]:
#Some of data is nested multiple times like :[[[[cor],[cor],[cor]]]] 
def unpack_list(L):
    if isinstance(L[0][0],list):
        return unpack_list(L[0])   
    return L

In [20]:
from shapely.geometry import Polygon, Point
codes=[]
names=[]

def insert_features_to_df(df, features):

    for index, row in df.iterrows():
        latitude = row['Latitude']
        longitude = row['Longitude']
        # Flag to keep track of match
        match_found = False
        for feature in features:
            try:
                coordinates = feature['geometry']['coordinates']  # Extract the coordinates of the polygon

                polygon_coordinates = unpack_list(coordinates) #getting list in [[lat,long],[lat,long]] format
                mapped_polygon_coordinates = [tuple(sublist) for sublist in polygon_coordinates] #getting list in [(lat,long),(lat,long)] format for Polygon
                polygon = Polygon(mapped_polygon_coordinates)
                
                # Check if the coordinate is within the polygon
                if polygon.contains(Point(longitude, latitude)):
                    err=3
                    code = feature['properties'].get('code')
                    name = feature['properties'].get('nom')

                    # Append the values to the respective lists
                    codes.append(code)
                    names.append(name)

                    # Set the flag to True
                    match_found = True

                    # Break the loop once a match is found
                    break

            except Exception as e:
                print(f"Error processing feature: {e} ")

        # If no match is found, assign None or a default value to the columns
        if not match_found:
            codes.append(None)
            names.append(None)
    # Assign the new lists to the DataFrame columns
    df['domain_id']=codes
    df['region']=names
    
    return df

In [21]:
df_codes_regions=insert_features_to_df(df.copy(),features)

In [22]:
null_counts = df_codes_regions.isnull().sum()
print("None's values by columns")
print(null_counts)

None's values by columns
Latitude       0
Longitude      0
domain_id    611
region       611
dtype: int64


In [23]:
df_codes_regions_no_null=df_codes_regions.dropna()
df_codes_regions_no_null.insert(0, 'id', range(len(df_codes_regions_no_null)))

In [24]:
print(df_codes_regions_no_null.head())

    id   Latitude  Longitude domain_id                   region
1    0  50.641444   2.716302        32          Hauts-de-France
2    1  48.499953   7.735605        44                Grand Est
3    2  47.194465   5.601994        27  Bourgogne-Franche-Comté
9    3  48.265739   1.920661        24      Centre-Val de Loire
17   4  46.470890   3.111186        84     Auvergne-Rhône-Alpes


In [25]:
# Make a GET request to a URL
url_meta_data = "http://storage.gra.cloud.ovh.net/v1/AUTH_555bdc85997f4552914346d4550c421e/gra-vigi6-archive_public/2023/06/15/140214/CDP_CARTE_EXTERNE.json"
response = requests.get(url_meta_data)

# Check the response status code
if response.status_code == 200:
    # Request was successful
    meta_data = response.json()  # Extract the JSON data from the response
    # Process the data as needed
else:
    # Request failed
    print("GET request failed with status code:", response.status_code)

In [26]:
for item in meta_data:
    print(item)

meta_data.get("product")
periods=meta_data.get("product").get('periods')

product
meta


In [27]:
dataframes_by_J=[]

In [28]:
def generate_df_of_domains(domains_arr_obj):
    df = pd.DataFrame()  # Create an empty DataFrame

    for obj in domains_arr_obj:
        domain_id = obj['domain_id']
        phenomenon_items = obj['phenomenon_items']

        df_temp = pd.json_normalize(phenomenon_items, 'timelaps_items', ['phenomenon_id', 'phenomenon_max_color_id'])
        df_temp['domain_id'] = domain_id

        df = pd.concat([df, df_temp], ignore_index=True)

    return df

In [29]:
def set_binary_phenomen_columns(df):
    unique_ids = df['id'].unique()

    # Create phenomenon_n columns with 0 values
    for i in range(1, 10):
        column_name = 'phenomenon_' + str(i)
        df[column_name] = 0

    for id_value in unique_ids:
        subset = df.loc[df['id'] == id_value].copy()  # Create a copy of the subset

        # Map phenomenon_n columns based on phenomenon_id values
        for i in subset['phenomenon_id'].unique():
            df.loc[subset.index, 'phenomenon_' + str(i)] = 1

    # Drop the 'phenomenon_id' column
    df.drop('phenomenon_id', axis=1, inplace=True)

    # Drop duplicates based on all columns
    df.drop_duplicates(inplace=True, ignore_index=True)


In [30]:
for i in range(len(periods)):
    #i corresponds to J (jour)
    domains=periods[i].get("timelaps").get("domain_ids")
    merged_df = pd.merge(df_codes_regions_no_null, generate_df_of_domains(domains), on='domain_id')
    #using one-hot encoding for phenomen_id ,because we know it is between 1-9 and multiple phenomens can be at the same time in one region
    set_binary_phenomen_columns(merged_df)
    # Explanation:
    # Since we don't have any information about the risk level using the provided API,
    # we use the "phenomenon_max_color_id" field that indicates the color of this phenomenon
    # for the given day. According to the provided documentation, the values of the "phenomenon_max_color_id"
    # field correspond to the risk levels as follows:
    # • "0" : green
    # • "1" : yellow
    # • "2" : orange
    # • "3" : red
    # This approach allows us to map the color codes to the respective risk levels.
    merged_df.rename(columns={'phenomenon_max_color_id': 'risk level'}, inplace=True)
    merged_df['risk level'] = merged_df['risk level'] - 1
    dataframes_by_J.append(merged_df)
    

In [31]:
for i in range(len(dataframes_by_J)):
    dataframe=dataframes_by_J[i]
    print(f'Data frame of J{i}')
    print(dataframe.head(),end='\n\n\n\n\n\n')

Data frame of J0
   id   Latitude  Longitude domain_id           region            begin_time  \
0   0  50.641444   2.716302        32  Hauts-de-France  2023-06-15T14:00:00Z   
1  13  50.236156   3.989765        32  Hauts-de-France  2023-06-15T14:00:00Z   
2  36  49.245307   2.641606        32  Hauts-de-France  2023-06-15T14:00:00Z   
3  38  49.865471   3.295267        32  Hauts-de-France  2023-06-15T14:00:00Z   
4  67  50.117639   3.326869        32  Hauts-de-France  2023-06-15T14:00:00Z   

               end_time  color_id risk level  phenomenon_1  phenomenon_2  \
0  2023-06-15T22:00:00Z         1          0             1             1   
1  2023-06-15T22:00:00Z         1          0             1             1   
2  2023-06-15T22:00:00Z         1          0             1             1   
3  2023-06-15T22:00:00Z         1          0             1             1   
4  2023-06-15T22:00:00Z         1          0             1             1   

   phenomenon_3  phenomenon_4  phenomenon_5  

In [32]:
color_mapping={
    1:"lightgreen",#green in original doc
    2:"darkgreen",#yelllow in original doc
    3:"orange",
    4:"red"
}

phenomenon_mapping={
    1: "wind", 
    2: "rain", 
    3: "thunderstorms", 
    4: "floods", 
    5: "snow or ice",
    6: "heatwave", 
    7: "cold wave", 
    8: "avalanches", 
    9: "storm surges",
}

risk_level_mapping={
    0:"green",
    1:"yellow",
    2:"orange",
    3:"red"
}

In [33]:
import folium
def print_map(dataframe):
    # Create a folium map centered on Hauts-de-France
    map_center = [50.641444, 2.716302]
    m = folium.Map(location=map_center, zoom_start=10)

    # Iterate over the rows of the data frame
    for index, row in dataframe.iterrows():
        # Get the latitude, longitude, and color_id
        lat = row['Latitude']
        lon = row['Longitude']
        color_id = row['color_id']
        risk_level = row['risk level']

        # Get the marker color based on the color_id
        marker_color = color_mapping.get(color_id, 'gray')  # Use gray if color_id mapping not found
        # Create a marker with the corresponding color
        marker = folium.Marker(location=[lat, lon], icon=folium.Icon(color=marker_color))

        # Create a popup with additional information
        popup_text = f'Risk Level: {risk_level_mapping.get(risk_level)}'
        for i in range(1, 10):
            isPhenomenon=row[f'phenomenon_{i}']
            if isPhenomenon==1:
                phenomenon = phenomenon_mapping.get(i)
            else:
                phenomenon = "None"
            popup_text += f'<br>Phenomenon {i}: {phenomenon}'

        popup = folium.Popup(popup_text, max_width=250)

        # Add the popup to the marker
        marker.add_child(popup)

        # Add the marker to the map
        marker.add_to(m)
        
    return m

In [34]:
print_map(dataframes_by_J[0])

In [35]:
print_map(dataframes_by_J[1])