In [95]:
import requests
import bs4

url = "https://www.infoclimat.fr/observations-meteo/temps-reel/toulouse-blagnac/07630.html"

def get_html(url):
  response = requests.get(url)
  return response.text

def parse_html(html):
  soup = bs4.BeautifulSoup(html, "html.parser")
  return soup

def get_tables(soup):
  tables = soup.find_all("table")
  return tables

def get_table_releves(tables):
  # get table of id resptable-releves
  for table in tables:
    if table.get("id") == "resptable-releves":
      return table
  return None

def get_rows(table):
  rows = table.find_all("tr")
  return rows

def get_cells(row):
  # include th
  cells = row.find_all(["td", "th"])
  return cells

def get_data(cells):
  data = []
  for cell in cells:
    if cell.text.find("add_circle_outline")>-1:
      continue
    correctifs = cell.text
    correctifs = correctifs.replace("\n"," ").replace("km/h","km/h ").replace("hPa=","hPa")
    data.append(correctifs)
  return data

def get_data_from_table(table):
  rows = get_rows(table)
  data = []
  for row in rows:
    cells = get_cells(row)
    data.append(get_data(cells))
  return data

html = get_html(url)
soup = parse_html(html)
tables = get_tables(soup)
table_releves = get_table_releves(tables)
data = get_data_from_table(table_releves)[1:]


In [99]:
# convert to pandas
import pandas as pd

df = pd.DataFrame(data)
# colones : Heure locale, Temperature,	Temps, Pluie,	Vent,	Humidite,	Bio-meteo,	Pt. de rosee,	Pression,	Visibilite
df.columns = ["Heure locale","Temperature", "Temps", "Pluie", "Vent", "Humidite", "Bio-meteo", "Pt. de rosee", "Pression", "Visibilite"]
df[["Temperature_Mean", "Temperature_Min", "Temperature_Max"]] = df["Temperature"].str.extract(r"([\d\.]+) °C ([\d\.]+) → ([\d\.]+)").astype(float)

# remove last line
df = df.iloc[:-1]
# add todays column at the begining
df.insert(0, "Day", pd.to_datetime("today").strftime("%Y-%m-%d"))
#df["time"]=pd.to_datetime("today").strftime("%H:%M")
# insert df["time"] at column 2
df.insert(1, "Time", pd.to_datetime("today").strftime("%H:%M"))
# loop over lines and when encounter df["Heure local"]==00h decrease the df["date"] value by 1 day
is_lastday=False
for i in range(0, len(df)):
  if df.iloc[i]["Heure locale"].find("00h")>-1:
    is_lastday = True
    hour = df.iloc[i]["Heure locale"].replace("h","")
    df.iloc[i, 0] = pd.to_datetime(df.iloc[i, 0])
    df.iloc[i, 1] = df.iloc[i, 0].strftime("%H:%M")
    continue
  if is_lastday:
    hour = df.iloc[i]["Heure locale"].replace("h","")
    df.iloc[i, 0] = pd.to_datetime(df.iloc[i, 0]) - pd.Timedelta(days=1)+pd.Timedelta(hours=int(hour))
    df.iloc[i, 1] = df.iloc[i, 0].strftime("%H:%M")
  else:
    hour = df.iloc[i]["Heure locale"].replace("h","")
    df.iloc[i, 0] = pd.to_datetime(df.iloc[i, 0])+pd.Timedelta(hours=int(hour))
    df.iloc[i, 1] = df.iloc[i, 0].strftime("%H:%M")

In [100]:
df.to_csv("toulouse-blagnac.csv", index=False)
df

Unnamed: 0,Day,Time,Heure locale,Temperature,Temps,Pluie,Vent,Humidite,Bio-meteo,Pt. de rosee,Pression,Visibilite,Temperature_Mean,Temperature_Min,Temperature_Max
0,2025-02-14 01:00:00,01:00,01h,9.7 °C 9.1 → 9.7,,0 mm/1h,11 km/h raf.16.2,85%,8.0,7.3 °C,1020.3hPa,18 km,9.7,9.1,9.7
1,2025-02-14 00:00:00,00:00,00h,9.2 °C 9.1 → 9.3,,0 mm/1h,7 km/h raf.12.6,86%,8.2,7 °C,1020.4hPa,13 km,9.2,9.1,9.3
2,2025-02-13 23:00:00,23:00,23h,9.0 °C 9 → 9.3,,0 mm/1h,7 km/h raf.10.8,87%,7.9,7 °C,1020.6hPa,9 km,9.0,9.0,9.3
3,2025-02-13 22:00:00,22:00,22h,9.3 °C 9.2 → 9.5,,0 mm/1h,4 km/h raf.7.2,86%,9.3,7.1 °C,1020.3hPa,9 km,9.3,9.2,9.5
4,2025-02-13 21:00:00,21:00,21h,9.4 °C 9.4 → 10.4,,0 mm/1h,4 km/h raf.5.8,83%,9.4,6.7 °C,1020.3hPa,17 km,9.4,9.4,10.4
5,2025-02-13 20:00:00,20:00,20h,10.4 °C 10.1 → 10.7,,0 mm/1h,4 km/h raf.7.6,79%,,6.9 °C,1020.4hPa,28 km,10.4,10.1,10.7
6,2025-02-13 19:00:00,19:00,19h,10.6 °C 10.6 → 11.6,,0 mm/1h,4 km/h raf.9,77%,,6.7 °C,1019.8hPa,35 km,10.6,10.6,11.6
7,2025-02-13 18:00:00,18:00,18h,11.7 °C 11.7 → 12.2,,0 mm/1h,7 km/h raf.12.2,69%,50.0,6.2 °C,1019.7hPa,60 km,11.7,11.7,12.2
8,2025-02-13 17:00:00,17:00,17h,12.2 °C 12.1 → 13.3,,0 mm/1h,7 km/h raf.20.2,69%,133.0,6.7 °C,1019.7hPa,60 km,12.2,12.1,13.3
9,2025-02-13 16:00:00,16:00,16h,13.0 °C 12.2 → 13.2,,0 mm/1h,11 km/h raf.18.4,65%,267.0,6.6 °C,1019.6hPa,50 km,13.0,12.2,13.2


In [101]:
df.drop(columns=["Temperature","Heure locale", "Temps", "Bio-meteo","Pluie","Pt. de rosee","Pression"], inplace=True)
df

Unnamed: 0,Day,Time,Vent,Humidite,Visibilite,Temperature_Mean,Temperature_Min,Temperature_Max
0,2025-02-14 01:00:00,01:00,11 km/h raf.16.2,85%,18 km,9.7,9.1,9.7
1,2025-02-14 00:00:00,00:00,7 km/h raf.12.6,86%,13 km,9.2,9.1,9.3
2,2025-02-13 23:00:00,23:00,7 km/h raf.10.8,87%,9 km,9.0,9.0,9.3
3,2025-02-13 22:00:00,22:00,4 km/h raf.7.2,86%,9 km,9.3,9.2,9.5
4,2025-02-13 21:00:00,21:00,4 km/h raf.5.8,83%,17 km,9.4,9.4,10.4
5,2025-02-13 20:00:00,20:00,4 km/h raf.7.6,79%,28 km,10.4,10.1,10.7
6,2025-02-13 19:00:00,19:00,4 km/h raf.9,77%,35 km,10.6,10.6,11.6
7,2025-02-13 18:00:00,18:00,7 km/h raf.12.2,69%,60 km,11.7,11.7,12.2
8,2025-02-13 17:00:00,17:00,7 km/h raf.20.2,69%,60 km,12.2,12.1,13.3
9,2025-02-13 16:00:00,16:00,11 km/h raf.18.4,65%,50 km,13.0,12.2,13.2


# plus simple

In [34]:
url = "https://www.infoclimat.fr/observations-meteo/temps-reel/toulouse-blagnac/07630.html"
html = get_html(url)
df = pd.read_html(html)[1]
df.columns = ["Heure locale","X","Temperature", "Temps", "Pluie", "Vent", "Humidite", "Bio-meteo", "Pt. de rosee", "Pression", "Visibilite"]
df.drop(columns=["X"], inplace=True)
df

  df = pd.read_html(html)[1]


Unnamed: 0,Heure locale,Temperature,Temps,Pluie,Vent,Humidite,Bio-meteo,Pt. de rosee,Pression,Visibilite
0,01h,9.7 °C 9.1 → 9.7,,0 mm/1h,10 km/h raf.16.2,85%,,7.3 °C,1020.3hPa =,10 km
1,00h,9.2 °C 9.1 → 9.3,,0 mm/1h,7 km/h raf.12.6,86%,8.2,7 °C,1020.4hPa,13 km
2,23h,9.0 °C 9 → 9.3,,0 mm/1h,7 km/h raf.10.8,87%,7.9,7 °C,1020.6hPa,9 km
3,22h,9.3 °C 9.2 → 9.5,,0 mm/1h,4 km/h raf.7.2,86%,9.3,7.1 °C,1020.3hPa,9 km
4,21h,9.4 °C 9.4 → 10.4,,0 mm/1h,4 km/h raf.5.8,83%,9.4,6.7 °C,1020.3hPa,17 km
5,20h,10.4 °C 10.1 → 10.7,,0 mm/1h,4 km/h raf.7.6,79%,,6.9 °C,1020.4hPa,28 km
6,19h,10.6 °C 10.6 → 11.6,,0 mm/1h,4 km/h raf.9,77%,,6.7 °C,1019.8hPa,35 km
7,18h,11.7 °C 11.7 → 12.2,,0 mm/1h,7 km/h raf.12.2,69%,50,6.2 °C,1019.7hPa =,60 km
8,17h,12.2 °C 12.1 → 13.3,,0 mm/1h,7 km/h raf.20.2,69%,133,6.7 °C,1019.7hPa,60 km
9,16h,13.0 °C 12.2 → 13.2,,0 mm/1h,11 km/h raf.18.4,65%,267,6.6 °C,1019.6hPa,50 km
