In [10]:
import os
import pandas as pd
import json

In [2]:
def extract_species(x):
    try:
        parts = x.split('_')
        return "_".join(parts[:2])
    except IndexError:
        return None


In [3]:
# Chemins des données
base_path = "./birds_dataset"
features_file = "./second_birds_combined_features.csv"

# Charger le fichier des features des oiseaux
try:
    birds_features = pd.read_csv(features_file)
    if "image_name" not in birds_features.columns:
        raise KeyError("La colonne 'image_name' est absente dans second_birds_combined_features.csv.")
    print("Fichier des features chargé avec succès.")
except Exception as e:
    print(f"Erreur lors du chargement du fichier des features: {e}")
    exit()

Fichier des features chargé avec succès.


In [4]:
# Ajuster 'image_name' pour correspondre au format des dossiers dans birds_dataset
# Garder seulement jusqu'au deuxième underscore pour correspondre à `species`
birds_features["species"] = birds_features["image_name"].apply(extract_species)
birds_features["species"] = birds_features["species"].str.strip()  # Enlever les espaces au début/fin
birds_features["species"]


0      Nycticorax_nycticorax
1        Nannopterum_auritum
2             Cathartes_aura
3        Dryobates_pubescens
4        Meleagris_gallopavo
               ...          
320     Haemorhous_mexicanus
321         Fulica_americana
322              Cygnus_olor
323        Mimus_polyglottos
324      Agelaius_phoeniceus
Name: species, Length: 325, dtype: object

In [5]:
# Initialiser une liste pour stocker les données combinées
combined_data = []

# Parcourir les dossiers par espèce
species_list = [
    os.path.join(base_path, species)
    for species in os.listdir(base_path)
    if os.path.isdir(os.path.join(base_path, species))
]

print("Liste des espèces :", species_list)

for species_dir in species_list:
    species_name = os.path.basename(species_dir)

    # Lire les fichiers climate_data et coordinates
    climate_data_path = os.path.join(species_dir, "climate_data.txt")
    coordinates_path = os.path.join(species_dir, "coordinates.txt")

    try:
        # Lire les données climatiques
        with open(climate_data_path, "r") as f:
            climate_lines = f.readlines()
        
        # Séparer les colonnes manuellement
        climate_data = pd.DataFrame([line.strip().split(", ") for line in climate_lines], 
                                    columns=["climate_var1", "climate_var2", "climate_var3"])

        # Lire les données de coordonnées
        with open(coordinates_path, "r") as f:
            coord_lines = f.readlines()
        
        # Nettoyer les coordonnées : retirer "Coordinates:" et séparer latitude/longitude
        coordinates_cleaned = pd.DataFrame(
            [line.strip().replace("Coordinates: ", "").split(", ") for line in coord_lines], 
            columns=["latitude", "longitude"]
        )
        
        # Convertir latitude et longitude en float
        coordinates_cleaned["latitude"] = coordinates_cleaned["latitude"].astype(float)
        coordinates_cleaned["longitude"] = coordinates_cleaned["longitude"].astype(float)

        # Vérifier que les deux fichiers ont le même nombre de lignes
        if len(climate_data) == len(coordinates_cleaned):
            # Générer les noms des images
            image_names = [f"{species_name}_{i+1}" for i in range(len(climate_data))]

            # Créer un DataFrame temporaire combiné
            temp_data = pd.concat([pd.Series(image_names, name="image_name"), climate_data, coordinates_cleaned], axis=1)

            # Ajouter une colonne pour l'espèce
            temp_data["species"] = species_name

            # Réorganiser les colonnes : species et image_name en premier
            temp_data = temp_data[["species", "image_name", "climate_var1", "climate_var2", "climate_var3", "latitude", "longitude"]]

            # Ajouter le DataFrame temporaire à la liste
            combined_data.append(temp_data)
        else:
            print(f"Les fichiers climate_data et coordinates dans {species_dir} n'ont pas le même nombre de lignes.")
    except Exception as e:
        print(f"Erreur lors de la lecture des fichiers dans {species_dir}: {e}")

# Combiner toutes les données dans un seul DataFrame
if combined_data:
    final_climate_coordinates_data = pd.concat(combined_data, ignore_index=True)
    print("Données combinées avec succès.")
else:
    print("Aucune donnée combinée n'a été générée.")
cols = ['species', 'image_name'] + [col for col in birds_features.columns if col not in ['species', 'image_name']]
birds_features = birds_features[cols]
birds_features["image_name"] = birds_features["image_name"].apply(lambda x: "_".join(x.split("_")[:3]))
birds_features.to_csv("birds_features.csv")
# Afficher un aperçu des données combinées
final_climate_coordinates_data.to_csv("climate_coordinates_data.csv")


Liste des espèces : ['./birds_dataset\\Agelaius_phoeniceus', './birds_dataset\\Anas_platyrhynchos', './birds_dataset\\Ardea_alba', './birds_dataset\\Ardea_cinerea', './birds_dataset\\Ardea_herodias', './birds_dataset\\Astur_cooperii', './birds_dataset\\Branta_canadensis', './birds_dataset\\Buteo_jamaicensis', './birds_dataset\\Buteo_lineatus', './birds_dataset\\Cardinalis_cardinalis', './birds_dataset\\Cathartes_aura', './birds_dataset\\Charadrius_vociferus', './birds_dataset\\Chroicocephalus_ridibundus', './birds_dataset\\Colaptes_auratus', './birds_dataset\\Columba_livia', './birds_dataset\\Corvus_brachyrhynchos', './birds_dataset\\Corvus_corax', './birds_dataset\\Cyanocitta_cristata', './birds_dataset\\Cygnus_olor', './birds_dataset\\Dryobates_pubescens', './birds_dataset\\Egretta_thula', './birds_dataset\\Erithacus_rubecula', './birds_dataset\\Fringilla_coelebs', './birds_dataset\\Fulica_americana', './birds_dataset\\Fulica_atra', './birds_dataset\\Haemorhous_mexicanus', './birds_d

In [7]:
birds_features

Unnamed: 0,species,image_name,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,...,feature_24293,feature_24294,feature_24295,feature_24296,feature_24297,feature_24298,feature_24299,feature_24300,feature_24301,feature_24302
0,Nycticorax_nycticorax,Nycticorax_nycticorax_5,0.139346,0.000000,0.068517,0.020429,0.163285,0.216670,0.154884,0.102822,...,0.268899,0.076876,0.268899,0.250486,0.042583,0.089110,0.060802,85.704041,60.676086,120.936646
1,Nannopterum_auritum,Nannopterum_auritum_3,0.097628,0.000000,0.027504,0.015246,0.252503,0.211766,0.234390,0.063763,...,0.176530,0.074186,0.180702,0.245440,0.245440,0.097022,0.076249,97.498352,27.115417,93.024353
2,Cathartes_aura,Cathartes_aura_3,0.063757,0.002176,0.189566,0.337707,0.228905,0.009978,0.008488,0.005604,...,0.119392,0.270285,0.270285,0.061064,0.013641,0.028015,0.000000,67.046875,30.084351,125.654846
3,Dryobates_pubescens,Dryobates_pubescens_1,0.156524,0.002887,0.284261,0.117919,0.284261,0.133698,0.009143,0.009870,...,0.100816,0.021444,0.200168,0.249502,0.210926,0.028388,0.035827,85.657227,80.006409,116.632629
4,Meleagris_gallopavo,Meleagris_gallopavo_4,0.227439,0.026346,0.190284,0.145307,0.219196,0.171729,0.227439,0.209838,...,0.049702,0.093389,0.233785,0.233785,0.177662,0.131284,0.026807,79.885010,51.020020,112.676697
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320,Haemorhous_mexicanus,Haemorhous_mexicanus_2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.017243,0.061404,0.305866,0.208094,0.198388,0.048944,0.113238,89.685913,79.422302,184.909363
321,Fulica_americana,Fulica_americana_1,0.126797,0.096359,0.142161,0.165873,0.259298,0.079942,0.075038,0.011935,...,0.174326,0.236529,0.236529,0.146415,0.020543,0.089619,0.030665,73.147644,42.568115,93.918213
322,Cygnus_olor,Cygnus_olor_4,0.051752,0.012972,0.020030,0.007922,0.040861,0.019806,0.027099,0.003955,...,0.021501,0.041607,0.151985,0.097789,0.046595,0.060733,0.032268,78.183289,43.053101,141.508484
323,Mimus_polyglottos,Mimus_polyglottos_1,0.040863,0.000000,0.008666,0.000000,0.014319,0.000000,0.014444,0.000000,...,0.089755,0.121202,0.155438,0.031507,0.046525,0.032192,0.044468,57.196106,118.555115,178.345459


In [8]:
final_climate_coordinates_data

Unnamed: 0,species,image_name,climate_var1,climate_var2,climate_var3,latitude,longitude
0,Agelaius_phoeniceus,Agelaius_phoeniceus_1,Temperate,Hot summer,Without dry season,28.502620,-96.832282
1,Agelaius_phoeniceus,Agelaius_phoeniceus_2,Temperate,Hot summer,Without dry season,37.543658,-77.536247
2,Agelaius_phoeniceus,Agelaius_phoeniceus_3,Temperate,Hot summer,Without dry season,33.000300,-96.741312
3,Agelaius_phoeniceus,Agelaius_phoeniceus_4,Temperate,Hot summer,Dry summer,39.323005,-121.837858
4,Agelaius_phoeniceus,Agelaius_phoeniceus_5,Arid,Steppe,Cold,34.979850,-106.671928
...,...,...,...,...,...,...,...
245,Zonotrichia_leucophrys,Zonotrichia_leucophrys_1,Arid,Steppe,Cold,31.399529,-102.020236
246,Zonotrichia_leucophrys,Zonotrichia_leucophrys_2,Arid,Steppe,Cold,31.399500,-102.020676
247,Zonotrichia_leucophrys,Zonotrichia_leucophrys_3,Temperate,Hot summer,Dry summer,39.187159,-122.056623
248,Zonotrichia_leucophrys,Zonotrichia_leucophrys_4,Temperate,Hot summer,Dry summer,33.799701,-117.757061


# Données csv en geojson

## birds_data

In [11]:
# Charger les données depuis le CSV
csv_file = "./climate_coordinates_data.csv"  # Remplace avec le bon chemin
df = pd.read_csv(csv_file)

# Vérifier si les colonnes nécessaires existent
required_columns = {"latitude", "longitude", "species"}
if not required_columns.issubset(df.columns):
    raise ValueError(f"Le fichier CSV doit contenir les colonnes {required_columns}")

# Construire le GeoJSON
geojson_data = {
    "type": "FeatureCollection",
    "features": []
}

for _, row in df.iterrows():
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [row["longitude"], row["latitude"]]
        },
        "properties": {
            "species": row["species"],
            "climate_var1": row.get("climate_var1", None),
            "climate_var2": row.get("climate_var2", None),
            "climate_var3": row.get("climate_var3", None),
        }
    }
    geojson_data["features"].append(feature)

# Sauvegarde en fichier GeoJSON
geojson_file = "birds_data.geojson"
with open(geojson_file, "w") as f:
    json.dump(geojson_data, f, indent=4)

print(f"GeoJSON sauvegardé sous {geojson_file}")


GeoJSON sauvegardé sous birds_data.geojson


In [13]:
geojson_data

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-96.8322818113, 28.5026198445]},
   'properties': {'species': 'Agelaius_phoeniceus',
    'climate_var1': 'Temperate',
    'climate_var2': 'Hot summer',
    'climate_var3': 'Without dry season'}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-77.5362466667, 37.5436583333]},
   'properties': {'species': 'Agelaius_phoeniceus',
    'climate_var1': 'Temperate',
    'climate_var2': 'Hot summer',
    'climate_var3': 'Without dry season'}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-96.7413116667, 33.0003]},
   'properties': {'species': 'Agelaius_phoeniceus',
    'climate_var1': 'Temperate',
    'climate_var2': 'Hot summer',
    'climate_var3': 'Without dry season'}},
  {'type': 'Feature',
   'geometry': {'type': 'Point',
    'coordinates': [-121.8378583586, 39.3230053652]},
   'properties': {'species': 'Agelaius_phoenic

## Koppen Geiger en geojson

In [17]:
import geopandas as gpd

# Charger le shapefile
shapefile_path = "climates.shp"  # Remplace par le bon chemin
gdf = gpd.read_file(shapefile_path)

# Convertir en GeoJSON
geojson_path = "koppen_geiger.geojson"
gdf.to_file(geojson_path, driver="GeoJSON")

print(f"Fichier GeoJSON sauvegardé sous {geojson_path}")


  write(


Fichier GeoJSON sauvegardé sous koppen_geiger.geojson
