In [None]:
import psycopg
from tqdm import tqdm 
from colorama import Style,Fore
import os
import json
from dotenv import load_dotenv
import time
import requests
import csv

load_dotenv()

In [None]:
def openJson(path):
    with open(path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

def saveJson(path,data):
    with open(path, "w", encoding="utf-8") as f:
       json.dump(data, f, ensure_ascii=False, indent=2)
       print(Style.BRIGHT+Fore.GREEN+'\n json saved'+Style.RESET_ALL)

# Update DB with the new tables

In [None]:
conn = psycopg.connect(
    dbname="youtubestay",
    user="postgres",
    password=os.getenv("POSTGRE_PASSWORD"),
    host="localhost",
    port="5432"
)


cur = conn.cursor()

cur.execute("""
    CREATE TABLE entites_spatiales (
        id_entite_spatiale TEXT PRIMARY KEY,
        label TEXT NOT NULL,
        latitude FLOAT NOT NULL,
        longitude FLOAT NOT NULL 
    )
""")

cur.execute("""
    CREATE TABLE entites_spatiales_videos (
        id_entite_spatiale TEXT REFERENCES entites_spatiales(id_entite_spatiale) ON DELETE CASCADE,
        id_video TEXT REFERENCES videos(id_video) ON DELETE CASCADE,
        PRIMARY KEY (id_video, id_entite_spatiale)
    )
""")

cur.execute("""
    CREATE TABLE entites_spatiales_chaines (
        id_entite_spatiale TEXT REFERENCES entites_spatiales(id_entite_spatiale) ON DELETE CASCADE,
        id_chaine TEXT REFERENCES chaines(id_chaine) ON DELETE CASCADE,
        PRIMARY KEY (id_chaine, id_entite_spatiale)
    )
""")


conn.commit()
cur.close()
conn.close()


# Fill the spacial_entities_videos table

## Prepare json

In [None]:
conn = psycopg.connect(
    dbname="youtubestay",
    user="postgres",
    password=os.getenv("POSTGRE_PASSWORD"),
    host="localhost",
    port="5432"
)

cur = conn.cursor()
cur.execute("SELECT id_video,titre,description,tags FROM videos")
rows = cur.fetchall()
cur.close()
conn.close()

videos = []
for row in rows:
    id_video, titre, description, tags = row
    videos.append({
        "id_video": id_video,
        "titre": titre,
        "description": description,
        "tags": tags
    })

In [None]:
len(videos)

In [None]:
saveJson('./jsons/videosForSpacialAnalysis.json',videos)

## Process

In [None]:
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

system_template = """
Tu es un extracteur d'entités géographiques françaises.
À partir d’un texte donné, identifie uniquement les **villes**, **communes** situés en France.
Ne prends **pas** en compte :
- les noms de pays (ex: "France"),
- les noms de personnes,
- les noms de chaînes YouTube, de plateformes (ex: YouTube, Tipeee),
- les noms imaginaires ou poétiques.

Retourne une **liste Python**, en minuscules, sans doublons, contenant uniquement des noms de lieux réels en France.
Pas d'explication, donner la reponse en format string.
"""

user_template = "Contexte : {contexte}"

system_message = SystemMessagePromptTemplate.from_template(system_template)
user_message = HumanMessagePromptTemplate.from_template(user_template)

chat_prompt = ChatPromptTemplate.from_messages([system_message, user_message])

In [None]:
from langchain_ollama import ChatOllama

llm_ollama = ChatOllama(model="llama3.2:3b")
chain_ollama =  chat_prompt | llm_ollama


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

{
    id_video = '',
    titre : '',
    description:'',
    tags:''
    +
    output : [
            {
            ent : Ent1
            lat :
            lon : },
            {
            ent : Ent2
            lat :
            lon : },
        ...
    ]
}

In [None]:
startFrom = 500

In [None]:
def getContext(title,description,tags):
    videoContext = ''
    videoContext+=title
    videoContext+= '\n'+description
    if tags:
        videoContext += '\n'+ ', '.join(tags)
    return videoContext

def getEntityVerification(entity,csvfile,column):
    with open(csvfile, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            if row[column].strip().lower() == entity:
                return True
    return False

def getLLMresponse(context,suffix):
    llm_gemini = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0,api_key=os.getenv('GEMINI_API_KEY_'+suffix))
    chain_gemini =  chat_prompt | llm_gemini
    response = chain_gemini.invoke({'contexte':context})
    return response
    
def getSpacialEntities(context,suffix):
    response = getLLMresponse(context,suffix)
    
    try:
        entities = eval(response.content.strip())
        if isinstance(entities, list):
            Entities = []
            for e in entities:
                e_cleaned = e.lower().strip()
                if getEntityVerification(e_cleaned,'./csvs/v_commune_2025.csv','NCCENR'):
                    Entities.append(e_cleaned)
            return Entities
    except:
        pass
    return []

def getGeocoding(entity):
    url = "https://nominatim.openstreetmap.org/search"
    params = {
        "q": entity + ", France",
        "format": "json",
        "limit": 1
    }
    headers = {
        "User-Agent": "geo-entity-extractor/1.0"
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
        if data:
            lat = float(data[0]["lat"])
            lon = float(data[0]["lon"])
            return {'lat':lat,
                    'lon':lon}
    except Exception as e:
        print(f"Erreur pour l'entité '{entity}': {e}")
    
    return None

def runAll(jsonfile):
    videos = openJson(jsonfile)
    counter = 0
    MyAPIsuffix = ['MONO','NOUR','NOUR2008','TEXTRA','ZEG']
    index = 0
    apiCounter = 0
    updatedVideos = [] # The new list of videos ti be saved
    
    for video in tqdm(videos[startFrom:]):
        videoContext = getContext(video['titre'],video['description'],video['tags'])
        
        videoSpacialEntities = getSpacialEntities(videoContext,MyAPIsuffix[index])
        
        #print("videoSpacialEntities  ",videoSpacialEntities)
        if len(videoSpacialEntities) > 0:
            output = []
            for ent in videoSpacialEntities:
                geocoding = getGeocoding(ent)
                if geocoding :
                    geocoding['ent']=ent
                    output.append(geocoding)
            if len(output) >0 :
                video['output'] = output
                
        # Updating the new list
        updatedVideos.append(video)
        
        # Safe Saving 
        counter+= 1
        if counter == 100:
            saveJson("./jsons/updatedVideos.json",updatedVideos)
            counter =0
            
        # API Switching
        apiCounter +=1
        if apiCounter == 13:
            index+=1
            apiCounter = 0
            if index==5:
                print(Style.BRIGHT+Fore.BLUE+'\n sleep for 60s'+Style.RESET_ALL)
                time.sleep(60)
                index=0
            print(Style.BRIGHT+Fore.YELLOW+f'\n API KEY switched to {MyAPIsuffix[index]}'+Style.RESET_ALL)

    # Saving 
    saveJson("./jsons/updatedVideos.json",updatedVideos)

- Test

In [None]:
title ="Plantation DES FRUITIERS /// Vers l'AUTONOMIE #1 - Episode6"
description ="Visite de la future maison, plantation des fruitiers et évolution du projet. Voici le programme pour le redémarrage de la série.\n\nRejoignez cette chaîne pour bénéficier d'avantages exclusifs :\nhttps://www.youtube.com/channel/UChdZfc0Lhbb-nlPePZWKzWQ/join\n--------------------------------------------------\n🌳MERCI🌳 à toi de faire vivre cette chaîne!\nTu veux soutenir la chaîne?\n🍋 https://fr.tipeee.com/potagerneration/ 🍋\n--------------------------------------------------\n📱 - RESEAUX - 📱\n\nTous mes réseaux sociaux: https://linktr.ee/potagerneration\n--------------------------------------------------\n🌱- LIENS UTILES -🌱\n\n🏡La maison en vente: https://www.stephaneplazaimmobilier.com/immobilier-acheter/4121653/maison-211-m2-a-cleguer \n📺 La vidéo du Jardin d'Emerveille: https://youtu.be/O0ggpcWXiFU?si=RD5DkXVkhMujuQQV\n📜 Calendrier des semis: https://www.potagerneration.com/telecharger/\n--------------------------------------------------\n📺 - SOMMAIRE - 📺\n\n03:13 Visite de la future maison\n07:55 Etat du projet et plans permis construire\n10:07 Le terrain\n14:04 Plantation des fruitiers\n20:04 L' évolution du projet et plans\n--------------------------------------------------\n@Potagerneration  \n#potager #jardinage #jardin #permaculture #autonomie #legumes #fruitier #tutorial \n--------------------------------------------------\nPotager - Jardinage - Potagerneration - Tuto potager - Autonomie - Fruitiers - Fruitier"
tags =[
      "potager",
      "permacultutre",
      "potager bio",
      "débutant potager",
      "potager débutant",
      "culture facile",
      "potagerneration",
      "tuto potager",
      "Autonomie",
      "autonomie",
      "autonomie alimentaire",
      "vivre autonome",
      "vivre en autonomie",
      "devenir autonome",
      "autonome électricité",
      "fruitiers",
      "planter les fruitier",
      "comment planter un fruitier",
      "plantation fruitier",
      "plantations fruitiers",
      "potager blog",
      "plantation pommier",
      "plantation cerisier",
      "plantation prunier",
      "plantation poirier"
    ]
videoTestContexte = getContext(title, description, tags)

#print(videoTestContexte)


In [None]:
# Exemple de texte avec des noms de lieux
texte_contenu = """
Lors de mon voyage en Provence, j’ai visité Marseille, le quartier du Panier, Aix-en-Provence 
et un petit village appelé Eygalières. Ensuite, nous sommes allés à Nice et dans le Vieux-Nice.
"""

getSpacialEntities(videoTestContexte,'MONO')

In [None]:
getEntityVerification('cléguer','./csvs/v_commune_2025.csv','NCCENR')

In [None]:
getGeocoding('aix-en-provence')

- Run on All

In [None]:
runAll("./jsons/videosForSpacialAnalysis.json")

  1%|▏         | 585/42333 [18:52<217:51:08, 18.79s/it]

[1m[33m
 API KEY switched to MONO[0m


  1%|▏         | 598/42333 [19:09<15:52:27,  1.37s/it] 

[1m[33m
 API KEY switched to NOUR[0m


  1%|▏         | 600/42333 [19:10<12:16:19,  1.06s/it]

[1m[32m
 json saved[0m


  1%|▏         | 611/42333 [19:23<12:17:48,  1.06s/it]

[1m[33m
 API KEY switched to NOUR2008[0m


  1%|▏         | 624/42333 [19:37<15:53:52,  1.37s/it]

[1m[33m
 API KEY switched to TEXTRA[0m


  1%|▏         | 625/42333 [29:04<32:20:01,  2.79s/it]


### Plot coordinates

In [None]:
import folium

location_data = {
    "lat": 47.9556915,
    "lon": -1.495366,
    "ent": "janzé"
}

map_obj = folium.Map(location=[location_data["lat"], location_data["lon"]], zoom_start=13)

folium.Marker(
    [location_data["lat"], location_data["lon"]],
    popup=location_data["ent"],
    tooltip=location_data["ent"]
).add_to(map_obj)

map_obj.save("map_janze.html")
