In [104]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# CONSTS

In [105]:
CHROME_DRIVER_PATH = "./chromedriver"
BRAVE_PATH = "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
DOWNLOAD_DIRECTORY = "/Users/mamoudou/Developer/Python/ProjetTrutoré/"
ARCHIVE_DIRECTORY = "/Users/mamoudou/Developer/Python/ProjetTrutoré/archive"
URL = "https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32"

## Browser Configs

In [106]:
options = Options()
options.binary_location = BRAVE_PATH  

prefs = {
    "download.default_directory": DOWNLOAD_DIRECTORY,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True 
}
options.add_experimental_option("prefs", prefs)

In [107]:
service = Service(executable_path=CHROME_DRIVER_PATH)

driver = webdriver.Chrome(service=service, options=options)



driver.get(URL)
print("Page Title:", driver.title)
telecharger_button = driver.find_element(By.XPATH, "//input[@type='submit' and @value='Télécharger']")
telecharger_button.click()

time.sleep(5)
driver.quit()

Page Title: Données Publiques de Météo-France - Données SYNOP essentielles OMM


## Fixing the file extension problem

In [108]:
from file_management import change_file_extension

cvs_file = change_file_extension(DOWNLOAD_DIRECTORY)

Le fichier a été renommé : /Users/mamoudou/Developer/Python/ProjetTrutoré/synop.2025010421.txt -> /Users/mamoudou/Developer/Python/ProjetTrutoré/synop.2025010421.csv


# Starting the ELT

In [109]:
import pandas as pd
import numpy as np
df = pd.read_csv(cvs_file, sep=";", dtype={"date": str})

print(df.to_string())

    numer_sta            date    pmer  tend cod_tend   dd    ff           t          td    u     vv  ww  w1  w2    n nbas  hbas  cl  cm  ch    pres niv_bar geop tend24        tn12        tn24        tx12        tx24     tminsol  sw  tw      raf10     rafper  per etat_sol  ht_neige    ssfrai perssfrai        rr1        rr3        rr6       rr12       rr24 phenspe1 phenspe2 phenspe3 phenspe4 nnuage1 ctype1 hnuage1 nnuage2 ctype2 hnuage2 nnuage3 ctype3 hnuage3 nnuage4 ctype4 hnuage4  Unnamed: 59
0        7005  20250104210000  100730  -410        8  130   5.9  274.050000  271.850000   85  10730  70  mq  mq   mq   mq    mq  mq  mq  mq   99830      mq   mq  -1720          mq          mq          mq          mq  272.650000  mq  mq  11.300000  11.700000  -10        0  0.000000        mq        mq  -0.100000  -0.100000  -0.100000  -0.100000  -0.100000       mq       mq       mq       mq      mq     mq      mq      mq     mq      mq      mq     mq      mq      mq     mq      mq          NaN
1   

# Récupération des informations importantes à savoir :
1. Indicatif OMM station : numéro de station `numer_sta` -> `int`
2. Date (UTC) : `date` -> `datetime`
3. Pression au niveau mer : `pmer` -> `int`
4. Variation de pression en 3 heures : `tend` -> `int`
5. Type de tendance barométrique : `cod_tend` -> `int`
6. Direction du vent moyen 10 mn : `dd` -> `int`
7. Vitesse du vent moyen 10 mn : `ff` -> `float`
8. Température : `t` -> `float`
9. Point de rosée : `td` -> `float`
10. Humidité : `u` -> `int`
11. Visibilité horizontale : `vv` -> `float`
12. Temps présent : `ww` -> `int`
13. Nébulosité totale : `n` -> `float`
14. Nébulosité des nuages de l'étage inférieur : `nbas` -> `int`
15. Hauteur de la base des nuages de l'étage inférieur : `hbas` -> `int`
16. Pression station : `pres` -> `int`
17. Variation de pression en 24 heures : `tend24` -> `int`
18. Température minimale sur N heures : `tn12` -> `float`
19. Température maximale sur N heures : `tx12` -> `float`
20. Température minimale du sol sur 12 heures : `tminsol` -> `float`
21. Rafales sur les 10 dernières minutes : `raf10` -> `float`
22. Rafales sur une période : `rafper` -> `float`
23. Période de mesure des rafales : `per` -> `float`
24. Précipitations dans les N dernières heures : `rr12` -> `float`

In [110]:
df = df[['numer_sta', 'date', "pmer", "tend", "cod_tend", "dd", "ff", "t", "td", "u", "vv", "ww", "n", "nbas", "hbas","pres", "tend24", "tn12", "tx12", "tminsol", "raf10", "rafper", "per", "rr12" ]]


print(df.to_string())


    numer_sta            date    pmer  tend cod_tend   dd    ff           t          td    u     vv  ww    n nbas  hbas    pres tend24        tn12        tx12     tminsol      raf10     rafper  per       rr12
0        7005  20250104210000  100730  -410        8  130   5.9  274.050000  271.850000   85  10730  70   mq   mq    mq   99830  -1720          mq          mq  272.650000  11.300000  11.700000  -10  -0.100000
1        7015  20250104210000  100930  -400        8  120   4.0  274.550000  271.950000   83  23700   0  100    8  1750  100340  -1450          mq          mq  272.550000   6.400000   8.200000  -10   0.000000
2        7020  20250104210000   99960  -460        8   90   2.2  277.650000  277.550000   99     mq  mq   mq   mq    mq   99850  -2450          mq          mq          mq   6.100000   7.000000  -10   8.500000
3        7027  20250104210000  100310  -410        8  130   8.0  274.550000  274.250000   98   3940  60  100    8   250   99480  -2190          mq          mq  275.

## Remplacer les valeurs manquantes `mq` par `NaN`

In [111]:
df.replace("mq", np.nan, inplace=True)
print(df.to_string())

    numer_sta            date    pmer  tend cod_tend   dd    ff           t          td    u     vv   ww    n nbas  hbas    pres tend24        tn12        tx12     tminsol      raf10     rafper  per       rr12
0        7005  20250104210000  100730  -410        8  130   5.9  274.050000  271.850000   85  10730   70  NaN  NaN   NaN   99830  -1720         NaN         NaN  272.650000  11.300000  11.700000  -10  -0.100000
1        7015  20250104210000  100930  -400        8  120   4.0  274.550000  271.950000   83  23700    0  100    8  1750  100340  -1450         NaN         NaN  272.550000   6.400000   8.200000  -10   0.000000
2        7020  20250104210000   99960  -460        8   90   2.2  277.650000  277.550000   99    NaN  NaN  NaN  NaN   NaN   99850  -2450         NaN         NaN         NaN   6.100000   7.000000  -10   8.500000
3        7027  20250104210000  100310  -410        8  130   8.0  274.550000  274.250000   98   3940   60  100    8   250   99480  -2190         NaN         NaN 

## Nettoyages de colonnes non renseignées 

1. Informations générales
	-	Numéro de station : `numer_sta` -> int
	-	Date (UTC) : `date` -> datetime
2. Pression
	-	Pression au niveau mer : `pmer` -> int
	-	Variation de pression en 3 heures : `tend` -> int
	-	Type de tendance barométrique : `cod_tend` -> int
	-	Pression station : `pres` -> int
	-	Variation de pression en 24 heures : `tend24` -> int
3. Vent
	-	Direction du vent moyen 10 mn : `dd` -> int
	-	Vitesse du vent moyen 10 mn : `ff` -> float
	-	Rafales sur les 10 dernières minutes : `raf10` -> float
	-	Rafales sur une période : `rafper` -> float
	-	Période de mesure des rafales : `per` -> float
4. Température
	-	Température : `t` -> float
	-	Point de rosée : `td` -> float
	-	Température minimale sur N heures : `tn12` -> float
	-	Température maximale sur N heures : `tx12` -> float
	-	Température minimale du sol sur 12 heures : `tminsol` -> float
5. Humidité
	-	Humidité : `u` -> int
6. Visibilité
	-	Visibilité horizontale : `vv` -> float
7. Nébulosité et nuages
	-	Nébulosité totale : `n` -> float
	-	Nébulosité des nuages de l’étage inférieur : `nbas` -> int
	-	Hauteur de la base des nuages de l’étage inférieur : `hbas` -> int
8. Précipitations
	-	Précipitations dans les N dernières heures : `rr12` -> float
9. Temps présent
	-	Temps présent : `ww` -> int

In [112]:
df[['dd', 'ff', 'raf10', 'rafper', 'rr12', "per"]] = df[['dd', 'ff', 'raf10', 'rafper', 'rr12', "per"]].fillna(0)

moyenne = ["pmer", "tend", "cod_tend", "pres", "tend24", "t", "td", "tn12", "tx12", "tminsol", "u", "vv", "n", "nbas", "hbas", "ww"]
df[moyenne] = df[moyenne].apply(pd.to_numeric, errors='coerce')

for col in moyenne:
    df[col] = df[col].fillna(df[col].mean())

print(df.to_string())

    numer_sta            date           pmer       tend  cod_tend   dd    ff           t         td           u            vv        ww           n      nbas         hbas           pres       tend24    tn12    tx12     tminsol      raf10     rafper  per       rr12
0        7005  20250104210000  100730.000000 -410.00000  8.000000  130   5.9  274.050000  271.85000   85.000000  10730.000000  70.00000   97.028571  6.384615  1045.277778   99830.000000 -1720.000000  273.75  275.85  272.650000  11.300000  11.700000  -10  -0.100000
1        7015  20250104210000  100930.000000 -400.00000  8.000000  120   4.0  274.550000  271.95000   83.000000  23700.000000   0.00000  100.000000  8.000000  1750.000000  100340.000000 -1450.000000  273.75  275.85  272.550000   6.400000   8.200000  -10   0.000000
2        7020  20250104210000   99960.000000 -460.00000  8.000000   90   2.2  277.650000  277.55000   99.000000  15597.906977  29.95122   97.028571  6.384615  1045.277778   99850.000000 -2450.000000  273.7

## Typages des colonnes

In [113]:
df["date"] = pd.to_datetime(df["date"], format='%Y%m%d%H%M%S')
df['date'] = df['date'].dt.strftime("%Y-%m-%d %H:%M:%S")

entier = ["numer_sta", "pmer", "tend", "cod_tend", "dd", "u", "ww", "nbas", "hbas", "pres", "tend24"]
for i in entier:
    print(f"{i} -> {df[i].dtype}")
    df[i] = df[i].astype(int)
    print(f"{i} -> {df[i].dtype}")
    print("")

double = ['dd', 'ff', 'raf10', 'rafper', 'rr12', "per", "t", "td", "tn12", "tx12", "tminsol", "n", "vv"]
for i in double:
    print(f"{i} -> {df[i].dtype}")
    df[i] = df[i].astype(float)
    print(f"{i} -> {df[i].dtype}")
    print("")

numer_sta -> float64
numer_sta -> int64

pmer -> int64
pmer -> int64

tend -> int64
tend -> int64

cod_tend -> int64
cod_tend -> int64

dd -> int64
dd -> int64

u -> int64
u -> int64

ww -> int64
ww -> int64

nbas -> int64
nbas -> int64

hbas -> int64
hbas -> int64

pres -> int64
pres -> int64

tend24 -> int64
tend24 -> int64

dd -> int64
dd -> int64

ff -> int64
ff -> int64

raf10 -> int64
raf10 -> int64

rafper -> int64
rafper -> int64

rr12 -> int64
rr12 -> int64

per -> int64
per -> int64

t -> int64
t -> int64

td -> int64
td -> int64

tn12 -> int64
tn12 -> int64

tx12 -> int64
tx12 -> int64

tminsol -> int64
tminsol -> int64

n -> int64
n -> int64

vv -> int64
vv -> int64



In [114]:
print(df.to_string())

    numer_sta                 date    pmer  tend  cod_tend     dd    ff           t         td    u            vv  ww           n  nbas  hbas    pres  tend24    tn12    tx12     tminsol  raf10  rafper   per  rr12
0        7005  2025-01-04 21:00:00  100730  -410         8  130.0   5.9  274.050000  271.85000   85  10730.000000  70   97.028571     6  1045   99830   -1720  273.75  275.85  272.650000   11.3    11.7 -10.0  -0.1
1        7015  2025-01-04 21:00:00  100930  -400         8  120.0   4.0  274.550000  271.95000   83  23700.000000   0  100.000000     8  1750  100340   -1450  273.75  275.85  272.550000    6.4     8.2 -10.0   0.0
2        7020  2025-01-04 21:00:00   99960  -460         8   90.0   2.2  277.650000  277.55000   99  15597.906977  29   97.028571     6  1045   99850   -2450  273.75  275.85  277.036111    6.1     7.0 -10.0   8.5
3        7027  2025-01-04 21:00:00  100310  -410         8  130.0   8.0  274.550000  274.25000   98   3940.000000  60  100.000000     8   250   9948

## Conversion des températures de Kelvin en Celcuis

In [115]:
celcuis = ["t", "td", "tn12", "tx12", "tminsol"]
for i in celcuis:
    df[i] = df[i] - 273.15

print(df.to_string())

    numer_sta                 date    pmer  tend  cod_tend     dd    ff          t        td    u            vv  ww           n  nbas  hbas    pres  tend24  tn12  tx12    tminsol  raf10  rafper   per  rr12
0        7005  2025-01-04 21:00:00  100730  -410         8  130.0   5.9   0.900000  -1.30000   85  10730.000000  70   97.028571     6  1045   99830   -1720   0.6   2.7  -0.500000   11.3    11.7 -10.0  -0.1
1        7015  2025-01-04 21:00:00  100930  -400         8  120.0   4.0   1.400000  -1.20000   83  23700.000000   0  100.000000     8  1750  100340   -1450   0.6   2.7  -0.600000    6.4     8.2 -10.0   0.0
2        7020  2025-01-04 21:00:00   99960  -460         8   90.0   2.2   4.500000   4.40000   99  15597.906977  29   97.028571     6  1045   99850   -2450   0.6   2.7   3.886111    6.1     7.0 -10.0   8.5
3        7027  2025-01-04 21:00:00  100310  -410         8  130.0   8.0   1.400000   1.10000   98   3940.000000  60  100.000000     8   250   99480   -2190   0.6   2.7   2.7000

## Arrondir tous les decimaux à 2chiffres apes la virgules

In [116]:
df[double] = df[double].round(2)
print(df.to_string())

    numer_sta                 date    pmer  tend  cod_tend     dd    ff      t     td    u        vv  ww       n  nbas  hbas    pres  tend24  tn12  tx12  tminsol  raf10  rafper   per  rr12
0        7005  2025-01-04 21:00:00  100730  -410         8  130.0   5.9   0.90  -1.30   85  10730.00  70   97.03     6  1045   99830   -1720   0.6   2.7    -0.50   11.3    11.7 -10.0  -0.1
1        7015  2025-01-04 21:00:00  100930  -400         8  120.0   4.0   1.40  -1.20   83  23700.00   0  100.00     8  1750  100340   -1450   0.6   2.7    -0.60    6.4     8.2 -10.0   0.0
2        7020  2025-01-04 21:00:00   99960  -460         8   90.0   2.2   4.50   4.40   99  15597.91  29   97.03     6  1045   99850   -2450   0.6   2.7     3.89    6.1     7.0 -10.0   8.5
3        7027  2025-01-04 21:00:00  100310  -410         8  130.0   8.0   1.40   1.10   98   3940.00  60  100.00     8   250   99480   -2190   0.6   2.7     2.70   10.2    11.5 -10.0   8.2
4        7037  2025-01-04 21:00:00  100690  -350       

# Archive file

In [117]:
from file_management import move_file

move_file(cvs_file, ARCHIVE_DIRECTORY)

Le fichier a été déplacé de /Users/mamoudou/Developer/Python/ProjetTrutoré/synop.2025010421.csv à /Users/mamoudou/Developer/Python/ProjetTrutoré/archive/synop.2025010421.csv


## Converting DF to JSON and then to readable dict

In [118]:
from datetime import datetime

In [119]:
import pprint   
import json

json_data = df.to_json(orient='records')
dict_data = json.loads(json_data)

pprint.pprint(dict_data)

[{'cod_tend': 8,
  'date': '2025-01-04 21:00:00',
  'dd': 130.0,
  'ff': 5.9,
  'hbas': 1045,
  'n': 97.03,
  'nbas': 6,
  'numer_sta': 7005,
  'per': -10.0,
  'pmer': 100730,
  'pres': 99830,
  'raf10': 11.3,
  'rafper': 11.7,
  'rr12': -0.1,
  't': 0.9,
  'td': -1.3,
  'tend': -410,
  'tend24': -1720,
  'tminsol': -0.5,
  'tn12': 0.6,
  'tx12': 2.7,
  'u': 85,
  'vv': 10730.0,
  'ww': 70},
 {'cod_tend': 8,
  'date': '2025-01-04 21:00:00',
  'dd': 120.0,
  'ff': 4.0,
  'hbas': 1750,
  'n': 100.0,
  'nbas': 8,
  'numer_sta': 7015,
  'per': -10.0,
  'pmer': 100930,
  'pres': 100340,
  'raf10': 6.4,
  'rafper': 8.2,
  'rr12': 0.0,
  't': 1.4,
  'td': -1.2,
  'tend': -400,
  'tend24': -1450,
  'tminsol': -0.6,
  'tn12': 0.6,
  'tx12': 2.7,
  'u': 83,
  'vv': 23700.0,
  'ww': 0},
 {'cod_tend': 8,
  'date': '2025-01-04 21:00:00',
  'dd': 90.0,
  'ff': 2.2,
  'hbas': 1045,
  'n': 97.03,
  'nbas': 6,
  'numer_sta': 7020,
  'per': -10.0,
  'pmer': 99960,
  'pres': 99850,
  'raf10': 6.1,
  'raf

In [120]:
first = dict_data[0]
js = json.dumps(first)

print(js)

{"numer_sta": 7005, "date": "2025-01-04 21:00:00", "pmer": 100730, "tend": -410, "cod_tend": 8, "dd": 130.0, "ff": 5.9, "t": 0.9, "td": -1.3, "u": 85, "vv": 10730.0, "ww": 70, "n": 97.03, "nbas": 6, "hbas": 1045, "pres": 99830, "tend24": -1720, "tn12": 0.6, "tx12": 2.7, "tminsol": -0.5, "raf10": 11.3, "rafper": 11.7, "per": -10.0, "rr12": -0.1}


In [121]:
import requests


In [122]:
from api import send_data

url_api = "https://127.0.0.1:8000/product" 
url_api = "https://127.0.0.1:8080/api/donnees_climatiques" 
donnees = {
    "name": "Produit modifié",
    "description": "Superbe desc",
    "price": 122.2,
    'createdAt': '2024-12-22T16:44:44+00:00',
    'updatedAt': '2024-12-22T16:44:44+00:00'
}

reponse = send_data(url_api, donnees, requests)


Erreur de connexion : HTTPSConnectionPool(host='127.0.0.1', port=8080): Max retries exceeded with url: /api/donnees_climatiques (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x11a833be0>: Failed to establish a new connection: [Errno 61] Connection refused'))


In [123]:
def recupByID(id):
    return int

def recupTout():
    return list

