<a href="https://colab.research.google.com/github/Meenakshi2434/Sea_Animal_Identification_by_LLM/blob/main/Sea_animal_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import google.generativeai as genai
import os
from PIL import Image
import io
import json

In [None]:
API_KEY = "ENTER_YOUR_API"
genai.configure(api_key=API_KEY)

In [None]:
# Use "gemini-1.5-flash-latest" for the most recent Flash model
MODEL_NAME = "gemini-2.0-flash"

# --- Image Loading Function ---
def load_image_from_path(image_path):
    """Loads an image from a given file path and returns it as a PIL Image."""
    try:
        with Image.open(image_path) as img:
            # It's good practice to convert to RGB to ensure compatibility with the API
            return img.convert("RGB")
    except FileNotFoundError:
        print(f"Error: Image file not found at '{image_path}'.")
        return None
    except Exception as e:
        print(f"Error loading image '{image_path}': {e}")
        return None

In [None]:
# IMPORTANT: Replace this with the actual path to your image file
image_path = "/content/IMG-20250505-WA0092.jpg"

In [None]:
input_image = load_image_from_path(image_path)

In [None]:

if input_image:
    # Initialize the Generative Model
    model = genai.GenerativeModel(MODEL_NAME)

    # Define the prompt content
    # The image is included directly in the content list
    prompt_content = [
                    input_image,
                    "Identify the marine species in this image. Provide 2 distinct predictions in total. "
                    "For both predictions, provide the following details:\n"
                    "- 'common_name': (string)\n"
                    "- 'scientific_name': (string)\n"
                    "- 'family': (string)\n"
                    "- 'lifespan': (string, range in months or years)\n"
                    "- 'diet': (string)\n"
                    "- 'shape': (string)\n"
                    "- 'habitat': (string)\n"
                    "- 'size': (string, provide size with unit like 'Up to 2cm', 'About 1.5m')\n"
                    "- 'color': (array of strings)\n"
                    "- 'short_description': (string, minimum 40 words, rich and detailed description of the species' physical features, behaviors, and unique characteristics)\n"
                    "- 'locations_found_in': (array of strings; list major oceans, marine regions, countries, or climatic zones where the species is found)\n"
                    "Your response MUST be a valid JSON array, using double quotes only around both keys and string values. DO NOT use single quotes. DO NOT wrap the JSON inside any markdown (like ```json). DO NOT include any explanation or formatting."
                ]

    print(f"Sending request to model: {MODEL_NAME}...")
    try:
        # Generate content from the model
        response = model.generate_content(prompt_content)

        # Print the model's response
        print("\n--- Model Response ---")
        print(f"[{response.text}]")
        print("---------------------\n")

    except Exception as e:
        print(f"\nAn error occurred during content generation: {e}")
        print("Please check your API key, model name, and internet connection.")
        print("Also, ensure the image content is suitable for the model's safety filters.")
else:
        print("Could not proceed without a valid image.")

Sending request to model: gemini-2.0-flash...

--- Model Response ---
[```json
[
  {
    "common_name": "Weedy Scorpionfish",
    "scientific_name": "Rhinopias frondosa",
    "family": "Scorpaenidae",
    "lifespan": "5-8 years",
    "diet": "Carnivorous, feeds on small fish and crustaceans.",
    "shape": "Elongated, compressed body with irregular appendages.",
    "habitat": "Rocky reefs, coral rubble, and seagrass beds.",
    "size": "Up to 23 cm",
    "color": [
      "Red",
      "Brown",
      "Pink",
      "White"
    ],
    "short_description": "The Weedy Scorpionfish, *Rhinopias frondosa*, is a master of disguise, exhibiting remarkable camouflage to blend seamlessly with its surroundings. Its body is adorned with numerous leaf-like appendages, giving it a weedy appearance. These elaborate structures, combined with its coloration, allow it to mimic algae or seaweed. This species is a carnivore, patiently ambushing small fish and crustaceans. When threatened, it can intensify it

In [None]:
# parsing of json response of llm.
# Attempt to parse the JSON response
import re
# Use a regular expression to find the JSON array string within a ```json ... ``` block
match = re.search(r"```json\s*(.*?)\s*```", response.text, re.DOTALL)  # removed response.text

json_string = ""
if match:
    json_string = match.group(1).strip() # Extract the content inside the block
else:
    # Fallback: if no markdown block, try to strip and parse directly
    json_string = response.text.strip()

if not json_string:
    raise ValueError("Extracted JSON string is empty or invalid.")

# Ensure the JSON string starts and ends with [] for an array
if not (json_string.startswith('[') and json_string.endswith(']')):
    print("Warning: Model did not return a top-level JSON array. Attempting to force array format.")
    # Attempt to wrap it in an array if it returned a single object
    if json_string.startswith('{') and json_string.endswith('}'):
        json_string = f"[{json_string}]"
    else:
        raise ValueError("Model did not return a valid JSON array or single object.")

parsed_responses = json.loads(json_string)



In [None]:
pip install wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11678 sha256=136fce480656b2b85c21a89445b6e9ca51131e5011354c712975b5fe630e7725
  Stored in directory: /root/.cache/pip/wheels/8f/ab/cb/45ccc40522d3a1c41e1d2ad53b8f33a62f394011ec38cd71c6
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [None]:
import requests
import wikipedia
from urllib.parse import quote

In [None]:


# -------------------------
# 1. Wikipedia API
# -------------------------
def fetch_wikipedia_image(query):
    try:
        page = wikipedia.page(query)
        for img_url in page.images:
            if img_url.lower().endswith(('.jpg', '.jpeg', '.png')) and 'logo' not in img_url.lower():
                return img_url
    except Exception:
        return None

# -------------------------
# 2. Wikimedia Commons Search API
# -------------------------
def fetch_commons_image(query):
    try:
        api_url = "https://commons.wikimedia.org/w/api.php"
        params = {
            "action": "query",
            "format": "json",
            "prop": "imageinfo",
            "generator": "search",
            "gsrsearch": query,
            "gsrlimit": 5,
            "iiprop": "url"
        }
        response = requests.get(api_url, params=params).json()
        pages = response.get("query", {}).get("pages", {})
        for page in pages.values():
            imageinfo = page.get("imageinfo", [])
            if imageinfo:
                url = imageinfo[0].get("url", "")
                if url.lower().endswith(('.jpg', '.jpeg', '.png')):
                    return url
    except Exception:
        return None

# -------------------------
# 3. FishBase (HTML Scrape Shortcut)
# -------------------------
def fetch_fishbase_image(scientific_name):
    try:
        fishbase_url = f"https://www.fishbase.se/summary/{scientific_name.replace(' ', '-')}.html"
        response = requests.get(fishbase_url, timeout=5)
        if response.ok and "Pictures" in response.text:
            from bs4 import BeautifulSoup
            soup = BeautifulSoup(response.text, 'html.parser')
            img_tags = soup.select('img[src*="photos"]')
            for img in img_tags:
                src = img.get("src")
                if src and not src.startswith("data:"):
                    return f"https://www.fishbase.se{src}"
    except Exception:
        return None

# -------------------------
# 4. iNaturalist API
# -------------------------
def fetch_inaturalist_image(scientific_name):
    try:
        url = f"https://api.inaturalist.org/v1/search?q={quote(scientific_name)}&sources=taxa"
        response = requests.get(url).json()
        results = response.get("results", [])
        for r in results:
            taxon = r.get("record", {})
            default_photo = taxon.get("default_photo", {})
            img_url = default_photo.get("medium_url") or default_photo.get("url")
            if img_url:
                return img_url
    except Exception:
        return None

# -------------------------
# Main fetch function
# -------------------------
def fetch_images_for_prediction(common_name, scientific_name, max_images=3):
    sources = [
        lambda: fetch_wikipedia_image(scientific_name),
        lambda: fetch_commons_image(scientific_name),
        lambda: fetch_fishbase_image(scientific_name),
        lambda: fetch_inaturalist_image(scientific_name),
        lambda: fetch_wikipedia_image(common_name),
        lambda: fetch_commons_image(common_name),
        lambda: fetch_fishbase_image(common_name),
        lambda: fetch_inaturalist_image(common_name),
    ]

    seen = set()
    images = []
    for fetch in sources:
        if len(images) >= max_images:
            break
        try:
            url = fetch()
            if url and url not in seen:
                seen.add(url)
                images.append(url)
        except Exception:
            continue
    return images


In [None]:
# for only image generation testing

for i, prediction in enumerate(parsed_responses):
    common_name = prediction.get("common_name", "")
    scientific_name = prediction.get("scientific_name", "")

    print(f"\nPrediction {i+1}: {common_name} ({scientific_name})")

    image_urls = fetch_images_for_prediction(common_name, scientific_name, max_images=3)

    if image_urls:
        for j, url in enumerate(image_urls, 1):
            print(f"  Image {j}: {url}")
    else:
        print("  No images found from any source.")



Prediction 1: Weedy Scorpionfish (Rhinopias frondosa)
  Image 1: https://upload.wikimedia.org/wikipedia/commons/3/3d/Antennarius_striatus.jpg
  Image 2: https://static.inaturalist.org/photos/29832944/medium.jpg
  Image 3: https://inaturalist-open-data.s3.amazonaws.com/photos/72392466/medium.jpeg

Prediction 2: Lacy Scorpionfish (Rhinopias aphanes)
  Image 1: https://upload.wikimedia.org/wikipedia/commons/7/7b/Rhinopias_aphanes.jpg
  Image 2: https://inaturalist-open-data.s3.amazonaws.com/photos/101692352/medium.jpg


In [None]:
import folium
import geopandas as gpd
from shapely.geometry import mapping, box, MultiPolygon

In [None]:
def normalize_location_name(name):
    name = name.strip().lower().replace("-", " ").replace("_", " ")
    # Remove common suffixes like " region", " coast"
    name = re.sub(r"\b(region|coast|area|zone)\b", "", name)
    return " ".join(name.split())  # Normalize spaces

In [None]:
import new_regions

In [None]:
# Convert the above bounds into shapely polygons for visualization
marine_zones_polygons = {}
for region, info in new_regions.items():
    norm_region = normalize_location_name(region)
    bounds = info["bounds"]
    if isinstance(bounds, list):
        polygons = [box(*b) for b in bounds]
        marine_zones_polygons[norm_region] = MultiPolygon(polygons)
    else:
        marine_zones_polygons[norm_region] = box(*bounds)


In [None]:
pip install plotly geopy pandas



In [None]:
from new_regions import new_regions, location_mapping


In [None]:
!pip install rapidfuzz


Collecting rapidfuzz
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.13.0


In [None]:
from rapidfuzz import process

def fuzzy_match(loc_norm, choices, threshold=90):
    match, score, _ = process.extractOne(loc_norm, choices)
    return match if score >= threshold else None

In [None]:
def get_canonical_name(location):
    norm_loc = normalize_location_name(location)
    return location_mapping.get(norm_loc, norm_loc)  # Return mapped name or original

import folium
import geopandas as gpd
from shapely.geometry import mapping

def create_distribution_map(locations, scientific_name, map_filename):
    shapefile_path = "/content/EEZ/EEZ_land_union_v4_202410/EEZ_land_union_v4_202410.shp"
    eez_gdf = gpd.read_file(shapefile_path)

    # Normalize EEZ shapefile relevant columns to lowercase for matching
    #cols_to_check = ["TERRITORY1", "TERRITORY2", "TERRITORY3", "SOVEREIGN1", "SOVEREIGN2", "SOVEREIGN3"]
    #for col in cols_to_check:
    #    if col in eez_gdf.columns:
    #        eez_gdf[col] = eez_gdf[col].str.lower().fillna("")

    m = folium.Map(location=[0, 0], zoom_start=2, tiles="cartodbpositron")

    unmatched = []
    natural_earth_path = "/content/natural_earth_vector/10m_cultural/ne_10m_admin_1_states_provinces.shp"
    natural_earth_gdf = gpd.read_file(natural_earth_path)
    # Normalize names for matching
    natural_earth_gdf["name_norm"] = natural_earth_gdf["name"].str.lower().fillna("")
    natural_earth_gdf["admin_norm"] = natural_earth_gdf["admin"].str.lower().fillna("")  # Country name

    all_bounds = []  # To collect bounds of all added geometries

    for loc in locations:
        canonical = get_canonical_name(loc)
        loc_norm = normalize_location_name(canonical)

        # 1. Try match in EEZ
        # Combine and normalize all possible name values from EEZ shapefile
        eez_name_cols = ["SOVEREIGN1", "SOVEREIGN2", "SOVEREIGN3", "TERRITORY1", "TERRITORY2", "TERRITORY3"]
        eez_names = set()

        for col in eez_name_cols:
            if col in eez_gdf.columns:
                eez_names.update(eez_gdf[col].dropna().str.lower())

        eez_names = list(eez_names)
        # Try fuzzy match instead of exact match
        best_match = fuzzy_match(loc_norm, eez_names, threshold=90)

        if best_match:
            matched_rows = eez_gdf[
                eez_gdf[eez_name_cols].apply(lambda row: best_match in [str(val).lower() for val in row.values], axis=1)
            ]
        else:
            matched_rows = gpd.GeoDataFrame()  # No match

        if not matched_rows.empty:
            for _, row in matched_rows.iterrows():
                if not row.geometry.is_empty:
                    folium.GeoJson(
                        data=mapping(row.geometry),
                        style_function=lambda x: {
                            "fillColor": "#ff9f00",
                            "color": "#b36000",
                            "weight": 1,
                            "fillOpacity": 0.5,
                        },
                        tooltip=folium.Tooltip(loc)
                    ).add_to(m)
                    # Collect bounds
                    bounds = row.geometry.bounds  # (minx, miny, maxx, maxy)
                    all_bounds.append([[bounds[1], bounds[0]], [bounds[3], bounds[2]]])
            print(f"Added EEZ polygons for '{loc}' (matched in shapefile).")
            continue

        # 2. Try match in Natural Earth states/provinces
        matched_states = gpd.GeoDataFrame()  # Initialize as empty

        possible_names = natural_earth_gdf["name_norm"].tolist() + natural_earth_gdf["admin_norm"].tolist()
        best_match = fuzzy_match(loc_norm, possible_names)

        if best_match:
            matched_states = natural_earth_gdf[
                (natural_earth_gdf["name_norm"] == best_match) |
                (natural_earth_gdf["admin_norm"] == best_match)
            ]

        if not matched_states.empty:
            for _, row in matched_states.iterrows():
                if not row.geometry.is_empty:
                    folium.GeoJson(
                        data=mapping(row.geometry),
                        style_function=lambda x: {
                            "fillColor": "#a1d99b",
                            "color": "#31a354",
                            "weight": 1,
                            "fillOpacity": 0.4,
                        },
                        tooltip=folium.Tooltip(loc)
                    ).add_to(m)
                    # Collect bounds
                    bounds = row.geometry.bounds  # (minx, miny, maxx, maxy)
                    all_bounds.append([[bounds[1], bounds[0]], [bounds[3], bounds[2]]])
            print(f"Added Natural Earth polygons for '{loc}' (matched in admin 1 shapefile).")
            continue


        # 3. Marine zones fallback
        marine_poly = marine_zones_polygons.get(loc_norm, None)
        if marine_poly:
            folium.GeoJson(
                data=mapping(marine_poly),
                style_function=lambda x: {
                    "fillColor": "#5a9bd4",
                    "color": "#1f4e79",
                    "weight": 1.5,
                    "fillOpacity": 0.3,
                },
                tooltip=folium.Tooltip(loc)
            ).add_to(m)
            # Collect bounds
            bounds = marine_poly.bounds  # (minx, miny, maxx, maxy)
            all_bounds.append([[bounds[1], bounds[0]], [bounds[3], bounds[2]]])
            print(f"Added polygon for marine region '{loc}' (from predefined zones).")
            continue

        # 4. No match
        print(f"Location '{loc}' not found in EEZ, Natural Earth, or marine zones.")
        unmatched.append(loc)

    # After adding all geometries, fit the map to the bounds
    if all_bounds:
        # Find combined bounds
        lat_min = min(b[0][0] for b in all_bounds)
        lon_min = min(b[0][1] for b in all_bounds)
        lat_max = max(b[1][0] for b in all_bounds)
        lon_max = max(b[1][1] for b in all_bounds)
        m.fit_bounds([[lat_min, lon_min], [lat_max, lon_max]])
    else:
        # Default view if no bounds to fit
        m.location = [0, 0]
        m.zoom_start = 2

    title_html = f"<h3 align='center' style='font-size:16px'><b>Distribution Map for {scientific_name}</b></h3>"
    m.get_root().html.add_child(folium.Element(title_html))

    m.save(map_filename)
    print(f"Map saved to: {map_filename}")

    if unmatched:
        print("\nUnmatched locations:")
        for u in unmatched:
            print(f" - {u}")

In [None]:
import zipfile
import os

# Unzip the file
with zipfile.ZipFile("/content/natural_earth_vector.zip", "r") as zip_ref:
    zip_ref.extractall("natural_earth_vector")  # extract to a folder


In [None]:
for i, prediction in enumerate(parsed_responses):
    common_name = prediction.get("common_name", "")
    scientific_name = prediction.get("scientific_name", "")
    locations = prediction.get("locations_found_in", [])
    print(f"\nPrediction {i + 1}:")
    print(f"  Common Name: {common_name}")
    print(f"  Scientific Name: {scientific_name}")
    print(f"  Locations: {locations}")
    # Generate distribution map
    map_filename = f"distribution_map_{i+1}_{scientific_name.replace(' ', '_')}.html"
    create_distribution_map(locations, scientific_name, map_filename)
    print(f" Distribution map saved: {map_filename}")


Prediction 1:
  Common Name: Weedy Scorpionfish
  Scientific Name: Rhinopias frondosa
  Locations: ['Indo-Pacific Ocean', 'Indonesia', 'Philippines', 'Australia', 'Japan']
Added polygon for marine region 'Indo-Pacific Ocean' (from predefined zones).
Added EEZ polygons for 'Indonesia' (matched in shapefile).
Added EEZ polygons for 'Philippines' (matched in shapefile).
Added EEZ polygons for 'Australia' (matched in shapefile).
Added EEZ polygons for 'Japan' (matched in shapefile).
Map saved to: distribution_map_1_Rhinopias_frondosa.html
 Distribution map saved: distribution_map_1_Rhinopias_frondosa.html

Prediction 2:
  Common Name: Lacy Scorpionfish
  Scientific Name: Rhinopias aphanes
  Locations: ['Indo-Pacific Ocean', 'Great Barrier Reef', 'Papua New Guinea', 'Indonesia']
Added polygon for marine region 'Indo-Pacific Ocean' (from predefined zones).
Added Natural Earth polygons for 'Great Barrier Reef' (matched in admin 1 shapefile).
Added EEZ polygons for 'Papua New Guinea' (matched