In [102]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# CONSTS

In [103]:
CHROME_DRIVER_PATH = "./chromedriver-mac/chromedriver"
BRAVE_PATH = "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
DOWNLOAD_DIRECTORY = "/Users/mamoudou/Developer/Python/ProjetTrutoré/"
ARCHIVE_DIRECTORY = "/Users/mamoudou/Developer/Python/ProjetTrutoré/archive"
URL = "https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32"

## Browser Configs

In [104]:
options = Options()
options.binary_location = BRAVE_PATH  

prefs = {
    "download.default_directory": DOWNLOAD_DIRECTORY,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True 
}
options.add_experimental_option("prefs", prefs)

In [105]:
service = Service(executable_path=CHROME_DRIVER_PATH)

driver = webdriver.Chrome(service=service, options=options)



driver.get(URL)
print("Page Title:", driver.title)
telecharger_button = driver.find_element(By.XPATH, "//input[@type='submit' and @value='Télécharger']")
telecharger_button.click()

time.sleep(5)
driver.quit()

Page Title: Données Publiques de Météo-France - Données SYNOP essentielles OMM


## Fixing the file extension problem

In [106]:
from file_management import change_file_extension

cvs_file = change_file_extension(DOWNLOAD_DIRECTORY)

Le fichier a été renommé : /Users/mamoudou/Developer/Python/ProjetTrutoré/synop.2025010615.txt -> /Users/mamoudou/Developer/Python/ProjetTrutoré/synop.2025010615.csv


# Starting the ELT

In [107]:
import pandas as pd
import numpy as np
df = pd.read_csv(cvs_file, sep=";", dtype={"date": str})

print(df.to_string())

    numer_sta            date    pmer  tend cod_tend   dd    ff           t          td   u     vv  ww  w1  w2    n nbas  hbas  cl  cm  ch    pres niv_bar geop tend24 tn12        tn24 tx12        tx24     tminsol  sw  tw      raf10     rafper  per etat_sol  ht_neige    ssfrai perssfrai        rr1        rr3        rr6       rr12       rr24 phenspe1 phenspe2 phenspe3 phenspe4 nnuage1 ctype1 hnuage1 nnuage2 ctype2 hnuage2 nnuage3 ctype3 hnuage3 nnuage4 ctype4 hnuage4  Unnamed: 59
0        7005  20250106150000   99150   550        1  260   8.2  280.350000  277.650000  83  19970   0  mq  mq   mq   mq    mq  mq  mq  mq   98290      mq   mq    -70   mq          mq   mq          mq  278.450000  mq  mq  13.600000  18.300000  -10       mq  0.000000        mq        mq   0.400000   0.600000   1.600000   1.600000   2.400000       mq       mq       mq       mq      mq     mq      mq      mq     mq      mq      mq     mq      mq      mq     mq      mq          NaN
1        7015  20250106150000   99

# Récupération des informations importantes à savoir :
1. Indicatif OMM station : numéro de station `numer_sta` -> `int`
2. Date (UTC) : `date` -> `datetime`
3. Pression au niveau mer : `pmer` -> `int`
4. Variation de pression en 3 heures : `tend` -> `int`
5. Type de tendance barométrique : `cod_tend` -> `int`
6. Direction du vent moyen 10 mn : `dd` -> `int`
7. Vitesse du vent moyen 10 mn : `ff` -> `float`
8. Température : `t` -> `float`
9. Point de rosée : `td` -> `float`
10. Humidité : `u` -> `int`
11. Visibilité horizontale : `vv` -> `float`
12. Temps présent : `ww` -> `int`
13. Nébulosité totale : `n` -> `float`
14. Nébulosité des nuages de l'étage inférieur : `nbas` -> `int`
15. Hauteur de la base des nuages de l'étage inférieur : `hbas` -> `int`
16. Pression station : `pres` -> `int`
17. Variation de pression en 24 heures : `tend24` -> `int`
18. Température minimale sur N heures : `tn12` -> `float`
19. Température maximale sur N heures : `tx12` -> `float`
20. Température minimale du sol sur 12 heures : `tminsol` -> `float`
21. Rafales sur les 10 dernières minutes : `raf10` -> `float`
22. Rafales sur une période : `rafper` -> `float`
23. Période de mesure des rafales : `per` -> `float`
24. Précipitations dans les N dernières heures : `rr12` -> `float`

In [108]:
df = df[['numer_sta', 'date', "pmer", "tend", "cod_tend", "dd", "ff", "t", "td", "u", "vv", "ww", "n", "nbas", "hbas","pres", "tend24", "tn12", "tx12", "tminsol", "raf10", "rafper", "per", "rr12" ]]


print(df.to_string())


    numer_sta            date    pmer  tend cod_tend   dd    ff           t          td   u     vv  ww    n nbas  hbas    pres tend24 tn12 tx12     tminsol      raf10     rafper  per       rr12
0        7005  20250106150000   99150   550        1  260   8.2  280.350000  277.650000  83  19970   0   mq   mq    mq   98290    -70   mq   mq  278.450000  13.600000  18.300000  -10   1.600000
1        7015  20250106150000   99010   740        1  250  10.8  280.050000  276.150000  76  48900   0   90    7   800   98450   -200   mq   mq  278.350000  15.600000  21.200000  -10   2.400000
2        7020  20250106150000   99260   190        1  260  13.8  281.150000  275.150000  66  10000  15   75    6  1250   99150    430   mq   mq          mq  18.100000  21.000000  -10   0.200000
3        7027  20250106150000   99510   280        3  250   8.6  279.150000  276.350000  82  35460   0  100    4   800   98700    290   mq   mq  280.350000  12.600000  19.400000  -10   3.600000
4        7037  20250106150000 

## Remplacer les valeurs manquantes `mq` par `NaN`

In [109]:
df.replace("mq", np.nan, inplace=True)
print(df.to_string())

    numer_sta            date    pmer  tend cod_tend   dd    ff           t          td    u     vv   ww    n nbas  hbas    pres tend24  tn12  tx12     tminsol      raf10     rafper  per       rr12
0        7005  20250106150000   99150   550        1  260   8.2  280.350000  277.650000   83  19970    0  NaN  NaN   NaN   98290    -70   NaN   NaN  278.450000  13.600000  18.300000  -10   1.600000
1        7015  20250106150000   99010   740        1  250  10.8  280.050000  276.150000   76  48900    0   90    7   800   98450   -200   NaN   NaN  278.350000  15.600000  21.200000  -10   2.400000
2        7020  20250106150000   99260   190        1  260  13.8  281.150000  275.150000   66  10000   15   75    6  1250   99150    430   NaN   NaN         NaN  18.100000  21.000000  -10   0.200000
3        7027  20250106150000   99510   280        3  250   8.6  279.150000  276.350000   82  35460    0  100    4   800   98700    290   NaN   NaN  280.350000  12.600000  19.400000  -10   3.600000
4        7

  df.replace("mq", np.nan, inplace=True)


## Nettoyages de colonnes non renseignées 

1. Informations générales
	-	Numéro de station : `numer_sta` -> int
	-	Date (UTC) : `date` -> datetime
2. Pression
	-	Pression au niveau mer : `pmer` -> int
	-	Variation de pression en 3 heures : `tend` -> int
	-	Type de tendance barométrique : `cod_tend` -> int
	-	Pression station : `pres` -> int
	-	Variation de pression en 24 heures : `tend24` -> int
3. Vent
	-	Direction du vent moyen 10 mn : `dd` -> int
	-	Vitesse du vent moyen 10 mn : `ff` -> float
	-	Rafales sur les 10 dernières minutes : `raf10` -> float
	-	Rafales sur une période : `rafper` -> float
	-	Période de mesure des rafales : `per` -> float
4. Température
	-	Température : `t` -> float
	-	Point de rosée : `td` -> float
	-	Température minimale sur N heures : `tn12` -> float
	-	Température maximale sur N heures : `tx12` -> float
	-	Température minimale du sol sur 12 heures : `tminsol` -> float
5. Humidité
	-	Humidité : `u` -> int
6. Visibilité
	-	Visibilité horizontale : `vv` -> float
7. Nébulosité et nuages
	-	Nébulosité totale : `n` -> float
	-	Nébulosité des nuages de l’étage inférieur : `nbas` -> int
	-	Hauteur de la base des nuages de l’étage inférieur : `hbas` -> int
8. Précipitations
	-	Précipitations dans les N dernières heures : `rr12` -> float
9. Temps présent
	-	Temps présent : `ww` -> int

In [110]:
df[['dd', 'ff', 'raf10', 'rafper', 'rr12', "per"]] = df[['dd', 'ff', 'raf10', 'rafper', 'rr12', "per"]].fillna(0)

moyenne = ["pmer", "tend", "cod_tend", "pres", "tend24", "t", "td", "tn12", "tx12", "tminsol", "u", "vv", "n", "nbas", "hbas", "ww"]
df[moyenne] = df[moyenne].apply(pd.to_numeric, errors='coerce')

for col in moyenne:
    df[col] = df[col].fillna(df[col].mean())

print(df.to_string())

    numer_sta            date           pmer        tend  cod_tend   dd    ff           t       td       u            vv    ww           n  nbas         hbas          pres  tend24  tn12  tx12     tminsol      raf10     rafper  per       rr12
0        7005  20250106150000   99150.000000  550.000000  1.000000  260   8.2  280.350000  277.650  83.000  19970.000000   0.0   93.484848  5.15  1403.947368   98290.00000  -70.00   NaN   NaN  278.450000  13.600000  18.300000  -10   1.600000
1        7015  20250106150000   99010.000000  740.000000  1.000000  250  10.8  280.050000  276.150  76.000  48900.000000   0.0   90.000000  7.00   800.000000   98450.00000 -200.00   NaN   NaN  278.350000  15.600000  21.200000  -10   2.400000
2        7020  20250106150000   99260.000000  190.000000  1.000000  260  13.8  281.150000  275.150  66.000  10000.000000  15.0   75.000000  6.00  1250.000000   99150.00000  430.00   NaN   NaN  280.658333  18.100000  21.000000  -10   0.200000
3        7027  20250106150000   

## Typages des colonnes

In [111]:
df["date"] = pd.to_datetime(df["date"], format='%Y%m%d%H%M%S')
df['date'] = df['date'].dt.strftime("%Y-%m-%d %H:%M:%S")

entier = ["numer_sta", "pmer", "tend", "cod_tend", "dd", "u", "ww", "nbas", "hbas", "pres", "tend24"]
for i in entier:
    print(f"{i} -> {df[i].dtype}")
    df[i] = df[i].astype(int)
    print(f"{i} -> {df[i].dtype}")
    print("")

double = ['dd', 'ff', 'raf10', 'rafper', 'rr12', "per", "t", "td", "tn12", "tx12", "tminsol", "n", "vv"]
for i in double:
    print(f"{i} -> {df[i].dtype}")
    df[i] = df[i].astype(float)
    print(f"{i} -> {df[i].dtype}")
    print("")

numer_sta -> int64
numer_sta -> int64

pmer -> float64
pmer -> int64

tend -> float64
tend -> int64

cod_tend -> float64
cod_tend -> int64

dd -> int64
dd -> int64

u -> float64
u -> int64

ww -> float64
ww -> int64

nbas -> float64
nbas -> int64

hbas -> float64
hbas -> int64

pres -> float64
pres -> int64

tend24 -> float64
tend24 -> int64

dd -> int64
dd -> float64

ff -> float64
ff -> float64

raf10 -> object
raf10 -> float64

rafper -> object
rafper -> float64

rr12 -> object
rr12 -> float64

per -> object
per -> float64

t -> float64
t -> float64

td -> float64
td -> float64

tn12 -> float64
tn12 -> float64

tx12 -> float64
tx12 -> float64

tminsol -> float64
tminsol -> float64

n -> float64
n -> float64

vv -> float64
vv -> float64



In [112]:
print(df.to_string())

    numer_sta                 date    pmer  tend  cod_tend     dd    ff           t       td   u            vv  ww           n  nbas  hbas    pres  tend24  tn12  tx12     tminsol  raf10  rafper   per  rr12
0        7005  2025-01-06 15:00:00   99150   550         1  260.0   8.2  280.350000  277.650  83  19970.000000   0   93.484848     5  1403   98290     -70   NaN   NaN  278.450000   13.6    18.3 -10.0   1.6
1        7015  2025-01-06 15:00:00   99010   740         1  250.0  10.8  280.050000  276.150  76  48900.000000   0   90.000000     7   800   98450    -200   NaN   NaN  278.350000   15.6    21.2 -10.0   2.4
2        7020  2025-01-06 15:00:00   99260   190         1  260.0  13.8  281.150000  275.150  66  10000.000000  15   75.000000     6  1250   99150     430   NaN   NaN  280.658333   18.1    21.0 -10.0   0.2
3        7027  2025-01-06 15:00:00   99510   280         3  250.0   8.6  279.150000  276.350  82  35460.000000   0  100.000000     4   800   98700     290   NaN   NaN  280.3500

## Conversion des températures de Kelvin en Celcuis

In [113]:
celcuis = ["t", "td", "tn12", "tx12", "tminsol"]
for i in celcuis:
    df[i] = df[i] - 273.15

print(df.to_string())

    numer_sta                 date    pmer  tend  cod_tend     dd    ff          t      td   u            vv  ww           n  nbas  hbas    pres  tend24  tn12  tx12    tminsol  raf10  rafper   per  rr12
0        7005  2025-01-06 15:00:00   99150   550         1  260.0   8.2   7.200000   4.500  83  19970.000000   0   93.484848     5  1403   98290     -70   NaN   NaN   5.300000   13.6    18.3 -10.0   1.6
1        7015  2025-01-06 15:00:00   99010   740         1  250.0  10.8   6.900000   3.000  76  48900.000000   0   90.000000     7   800   98450    -200   NaN   NaN   5.200000   15.6    21.2 -10.0   2.4
2        7020  2025-01-06 15:00:00   99260   190         1  260.0  13.8   8.000000   2.000  66  10000.000000  15   75.000000     6  1250   99150     430   NaN   NaN   7.508333   18.1    21.0 -10.0   0.2
3        7027  2025-01-06 15:00:00   99510   280         3  250.0   8.6   6.000000   3.200  82  35460.000000   0  100.000000     4   800   98700     290   NaN   NaN   7.200000   12.6    19

## Arrondir tous les decimaux à 2chiffres après la virgules

In [114]:
df[double] = df[double].round(2)
print(df.to_string())

    numer_sta                 date    pmer  tend  cod_tend     dd    ff      t     td   u        vv  ww       n  nbas  hbas    pres  tend24  tn12  tx12  tminsol  raf10  rafper   per  rr12
0        7005  2025-01-06 15:00:00   99150   550         1  260.0   8.2   7.20   4.50  83  19970.00   0   93.48     5  1403   98290     -70   NaN   NaN     5.30   13.6    18.3 -10.0   1.6
1        7015  2025-01-06 15:00:00   99010   740         1  250.0  10.8   6.90   3.00  76  48900.00   0   90.00     7   800   98450    -200   NaN   NaN     5.20   15.6    21.2 -10.0   2.4
2        7020  2025-01-06 15:00:00   99260   190         1  260.0  13.8   8.00   2.00  66  10000.00  15   75.00     6  1250   99150     430   NaN   NaN     7.51   18.1    21.0 -10.0   0.2
3        7027  2025-01-06 15:00:00   99510   280         3  250.0   8.6   6.00   3.20  82  35460.00   0  100.00     4   800   98700     290   NaN   NaN     7.20   12.6    19.4 -10.0   3.6
4        7037  2025-01-06 15:00:00   99440   450         1  

# Archive file

In [115]:
from file_management import move_file

move_file(cvs_file, ARCHIVE_DIRECTORY)

Le fichier a été déplacé de /Users/mamoudou/Developer/Python/ProjetTrutoré/synop.2025010615.csv à /Users/mamoudou/Developer/Python/ProjetTrutoré/archive/synop.2025010615.csv


# Sending data to API

In [116]:
import json
import requests
from api import send_data

In [117]:
API_URL = "https://127.0.0.1:8080/api/donnees_climatiques" 
dict_data = df.to_dict(orient="records")
for row in dict_data:
    send_data(API_URL, row, requests)

{"numer_sta": 7005, "date": "2025-01-06 15:00:00", "pmer": 99150, "tend": 550, "cod_tend": 1, "dd": 260.0, "ff": 8.2, "t": 7.2, "td": 4.5, "u": 83, "vv": 19970.0, "ww": 0, "n": 93.48, "nbas": 5, "hbas": 1403, "pres": 98290, "tend24": -70, "tn12": NaN, "tx12": NaN, "tminsol": 5.3, "raf10": 13.6, "rafper": 18.3, "per": -10.0, "rr12": 1.6}
Erreur de connexion : HTTPSConnectionPool(host='127.0.0.1', port=8080): Max retries exceeded with url: /api/donnees_climatiques (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x11ab021f0>: Failed to establish a new connection: [Errno 61] Connection refused'))
{"numer_sta": 7015, "date": "2025-01-06 15:00:00", "pmer": 99010, "tend": 740, "cod_tend": 1, "dd": 250.0, "ff": 10.8, "t": 6.9, "td": 3.0, "u": 76, "vv": 48900.0, "ww": 0, "n": 90.0, "nbas": 7, "hbas": 800, "pres": 98450, "tend24": -200, "tn12": NaN, "tx12": NaN, "tminsol": 5.2, "raf10": 15.6, "rafper": 21.2, "per": -10.0, "rr12": 2.4}
Erreur de connexion : HTTPSConne

In [118]:
url_api = "https://127.0.0.1:8000/product" 
donnees = {
    "name": "Essai",
    "description": "Superbe desc",
    "price": 122.2,
    'createdAt': '2024-12-22T16:44:44+00:00',
    'updatedAt': '2024-12-22T16:44:44+00:00'
}

send_data(url_api, donnees, requests)


Requête réussie!


[{'id': 20,
  'name': 'Essai',
  'description': 'Superbe desc',
  'price': 122.2,
  'createdAt': '2024-12-22T16:44:44+00:00',
  'updatedAt': '2024-12-22T16:44:44+00:00'}]