1. Web scraping manually + function

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from lat_lon_parser import parse
from sqlalchemy import create_engine 
import mysql.connector

#Berlin

In [2]:
url = "https://en.wikipedia.org/wiki/Berlin"

response = requests.get(url)

soup_berlin = BeautifulSoup(response.content, 'html.parser')

country_berlin = soup_berlin.find('td', class_='infobox-data').get_text(strip=True)

lng_berlin = soup_berlin.find('span', class_='longitude').get_text(strip=True)
ltd_berlin = soup_berlin.find('span', class_='latitude').get_text(strip=True)

#Hamburg

In [3]:
url = "https://en.wikipedia.org/wiki/Hamburg"

response = requests.get(url)

soup_hamburg = BeautifulSoup(response.content, 'html.parser')

country_hamburg = soup_hamburg.find('td', class_='infobox-data').get_text(strip=True)

lng_hamburg = soup_hamburg.find('span', class_='longitude').get_text(strip=True)
ltd_hamburg = soup_hamburg.find('span', class_='latitude').get_text(strip=True)


#Munich

In [4]:
url = "https://en.wikipedia.org/wiki/Munich"

response = requests.get(url)

soup_munich = BeautifulSoup(response.content, 'html.parser')

country_munich = soup_munich.find('td', class_='infobox-data').get_text(strip=True)

lng_munich = soup_munich.find('span', class_='longitude').get_text(strip=True)
ltd_munich = soup_munich.find('span', class_='latitude').get_text(strip=True)

In [5]:
#Function to replace steps above and to be able to use it for any other city
def cities_dataframe(cities):
  cities_data = []

  for city in cities:
    city_data = {}

    # city
    city_data["City"] = city

    # create the soup
    url = f"https://www.wikipedia.org/wiki/{city}"
    response = requests.get(url)
    city_soup = BeautifulSoup(response.content, 'html.parser')

    # country
    city_data["Country"] = city_soup.find(class_="infobox-data").get_text()

    # population
    city_population = city_soup.find(string="Population").find_next("td").get_text()
    city_population_clean = int(city_population.replace(",", ""))
    city_data["Population"] = city_population_clean

    # data retrieved
    city_data["Year_Data_Retrieved"] = city_soup.find(string="Population").find_next().get_text()[2:6]

    # latitude and longitude
    city_data["Latitude"] = city_soup.find(class_="latitude").get_text()
    city_data["Longitude"] = city_soup.find(class_="longitude").get_text()
    city_data["Latitude"] = parse(city_data["Latitude"])
    city_data["Longitude"] = parse(city_data["Longitude"])

    # append this city's data to the cities list
    cities_data.append(city_data)

  return pd.DataFrame(cities_data)

In [None]:
# call the function
list_of_cities = ["Berlin", "Hamburg", "Munich"]

cities_df = cities_dataframe(list_of_cities)
cities_df

2.Setting up environment in mysql WORKBENCH

In [None]:
###TO BE RUN IN MYSQL WORKBENCH ####
/***************************
Setting up the environment
***************************/

-- Drop the database if it already exists
DROP DATABASE IF EXISTS gans

-- Create the database
CREATE DATABASE gans;

-- Use the database
USE gans;



CREATE TABLE cities (
    city_id INT AUTO_INCREMENT, -- Automatically generated ID for each city
    city_name VARCHAR(255) NOT NULL, 
    country VARCHAR(255),
    longitude DECIMAL(9,6) NOT NULL,
	latitude DECIMAL(9,6) NOT NULL,
    PRIMARY KEY (city_id) -- Primary key to uniquely identify each city
);


CREATE TABLE population (
    Population_id INT AUTO_INCREMENT,
    Population INT NOT NULL,
    Year_Data_Retrieved VARCHAR(255),
    City_id INT,
    PRIMARY KEY (Population_id),
    FOREIGN KEY (City_id) REFERENCES cities(City_id)
);

CREATE TABLE Weather (
    Weather_id INT AUTO_INCREMENT,
    City_id INT,
    City_name VARCHAR(255),
    Datetime DATETIME,
    Temp DECIMAL(5,2),
    Feels_like DECIMAL(5,2),
    Humidity INT,
    Weather_main VARCHAR(255),
    Weather_desc VARCHAR (255),
    Wind_speed DECIMAL(5,2),
    Clouds INT,
    Rain_3h DECIMAL(5,2),
    PRIMARY KEY (Weather_id),
    FOREIGN KEY (City_id) REFERENCES cities(City_id)
);

In [None]:
3. Seperating DATAFRAME into two tables

3.0

In [9]:
cities_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   City                 3 non-null      object 
 1   Country              3 non-null      object 
 2   Population           3 non-null      int64  
 3   Year_Data_Retrieved  3 non-null      object 
 4   Latitude             3 non-null      float64
 5   Longitude            3 non-null      float64
dtypes: float64(2), int64(1), object(3)
memory usage: 276.0+ bytes


In [78]:
# Create the "cities df" for the relational data base

cities_to_db = cities_df[["City", "Country", "Latitude", "Longitude"]]
cities_to_db

Unnamed: 0,City,Country,Latitude,Longitude
0,Berlin,Germany,52.52,13.405
1,Hamburg,Germany,53.55,10.0
2,Munich,Germany,48.1375,11.575


In [79]:
cities_to_db.rename(columns={"City": "City_name"}, inplace=True)
cities_to_db

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cities_to_db.rename(columns={"City": "City_name"}, inplace=True)


Unnamed: 0,City_name,Country,Latitude,Longitude
0,Berlin,Germany,52.52,13.405
1,Hamburg,Germany,53.55,10.0
2,Munich,Germany,48.1375,11.575


In [80]:
# Create the "population df" for the relational data base

population_to_db = cities_df[["Population", "Year_Data_Retrieved"]]
population_to_db

Unnamed: 0,Population,Year_Data_Retrieved
0,3596999,2022
1,1964021,2023
2,1510378,2023


4. Sending Information to SQL and from

In [None]:
#Info for connection between mySQL Workbench and VS code
schema = "gans"
host = "127.0.0.1" #localhost
user = "root"
password = "INSERTPASSWORDHERE" #root password which was set up initially when installing 
port = 3306

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [None]:
# Push the "cities_to_db" to the empty "cities" table in the MySQL data base

cities_to_db.to_sql('Cities',
                  if_exists='append',
                  con=connection_string,
                  index=False)

3

In [81]:
#double checking if it worked by reading it back into VS code
cities_from_sql = pd.read_sql("Cities", con=connection_string)
cities_from_sql

Unnamed: 0,City_id,City_Name,Country,Longitude,Latitude
0,1,Berlin,Germany,13.405,52.52
1,2,Hamburg,Germany,10.0,53.55
2,3,Munich,Germany,11.575,48.1375


In [85]:
# Getting the "City_id" to the population df--> needed for relational database connections

population_to_db["City_id"] = cities_from_sql["City_id"]
population_to_db

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  population_to_db["City_id"] = cities_from_sql["City_id"]


Unnamed: 0,Population,Year_Data_Retrieved,City_id
0,3596999,2022,1
1,1964021,2023,2
2,1510378,2023,3


In [86]:
population_to_db.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Population           3 non-null      int64 
 1   Year_Data_Retrieved  3 non-null      object
 2   City_id              3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes


In [28]:
# Push the "population_to_db" to the empty "population" table in the MySQL data base

population_to_db.to_sql('population',
                  if_exists='append',
                  con=connection_string,
                  index=False)

3

5. Setting up open weather API JSON file to decide which we want to keep

In [125]:
# Berlin # testing first with one city to make sure it works and to determine what to keep from JSON
latitude = 52.520008
longitude = 13.404954

API_key = "89b4a954f2f955292f6b3136080e5184"

url = (f"https://api.openweathermap.org/data/2.5/forecast?lat={latitude}&lon={longitude}&appid={API_key}&units=metric")

response = requests.get(url)
json_berlin = response.json()

json_berlin

{'cod': '200',
 'message': 0,
 'cnt': 40,
 'list': [{'dt': 1744718400,
   'main': {'temp': 21.33,
    'feels_like': 20.93,
    'temp_min': 21.33,
    'temp_max': 21.47,
    'pressure': 1004,
    'sea_level': 1004,
    'grnd_level': 997,
    'humidity': 54,
    'temp_kf': -0.14},
   'weather': [{'id': 801,
     'main': 'Clouds',
     'description': 'few clouds',
     'icon': '02d'}],
   'clouds': {'all': 16},
   'wind': {'speed': 6.35, 'deg': 132, 'gust': 9.07},
   'visibility': 10000,
   'pop': 0,
   'sys': {'pod': 'd'},
   'dt_txt': '2025-04-15 12:00:00'},
  {'dt': 1744729200,
   'main': {'temp': 19.59,
    'feels_like': 19.2,
    'temp_min': 18.75,
    'temp_max': 19.59,
    'pressure': 1003,
    'sea_level': 1003,
    'grnd_level': 997,
    'humidity': 61,
    'temp_kf': 0.84},
   'weather': [{'id': 803,
     'main': 'Clouds',
     'description': 'broken clouds',
     'icon': '04d'}],
   'clouds': {'all': 67},
   'wind': {'speed': 5.97, 'deg': 125, 'gust': 9.83},
   'visibility': 10

In [None]:
json["list"] #vizualising what to keep from the info 


#BERLIN PUSHING WEATHER DATA TO SQL WORKBENCH

In [None]:
weather_data = json_berlin['list']  # Extract forecast list

# Extract only the relevant fields
filtered_data = []
for entry in weather_data:
    filtered_entry = {
        'datetime': entry['dt_txt'],
        'temp': entry['main']['temp'],
        'feels_like': entry['main']['feels_like'],
        'humidity': entry['main']['humidity'],
        'weather_main': entry['weather'][0]['main'],
        'weather_desc': entry['weather'][0]['description'],
        'wind_speed': entry['wind']['speed'],
        'clouds': entry['clouds']['all'],
        'rain_3h': entry.get('rain', {}).get('3h', 0.0) #fallback if it doesnt rain is 0.0
    }
    filtered_data.append(filtered_entry)

# Convert to DataFrame
df_berlin = pd.DataFrame(filtered_data)

#Convert datetime to datetime object
df_berlin['datetime'] = pd.to_datetime(df_berlin['datetime'])

df_berlin.columns = df_berlin.columns.str.capitalize()  #capitalizing every column name

In [134]:
cities = pd.read_sql("Cities", con=connection_string) #reading back from sql
cities

Unnamed: 0,City_id,City_Name,Country,Longitude,Latitude
0,1,Berlin,Germany,13.405,52.52
1,2,Hamburg,Germany,10.0,53.55
2,3,Munich,Germany,11.575,48.1375


In [135]:
# Create SQLAlchemy engine
engine = create_engine(connection_string)

# Step 1: Fetch City_ID for Berlin from the Cities table
conn = mysql.connector.connect(
    host=host,
    user=user,
    password=password,
    database=schema,
    port=port
)

cursor = conn.cursor()

# Fetch the City_ID for Berlin
cursor.execute("SELECT City_ID FROM Cities WHERE City_Name = 'Berlin'")
city_id = cursor.fetchone()[0]  # Gets the first row, fetches the first column (City_ID)

# Close the cursor and connection
cursor.close()
conn.close()

In [136]:
# Getting the "City_id" to the weather df--> needed for relational database connections
df_berlin['City_ID'] = city_id
df_berlin

Unnamed: 0,Datetime,Temp,Feels_like,Humidity,Weather_main,Weather_desc,Wind_speed,Clouds,Rain_3h,City_ID
0,2025-04-15 12:00:00,21.33,20.93,54,Clouds,few clouds,6.35,16,0.0,1
1,2025-04-15 15:00:00,19.59,19.2,61,Clouds,broken clouds,5.97,67,0.0,1
2,2025-04-15 18:00:00,15.22,14.65,71,Clouds,overcast clouds,4.14,100,0.0,1
3,2025-04-15 21:00:00,13.69,13.18,79,Clouds,overcast clouds,3.29,98,0.0,1
4,2025-04-16 00:00:00,13.74,13.39,85,Clouds,overcast clouds,1.74,99,0.0,1
5,2025-04-16 03:00:00,13.25,12.88,86,Clouds,overcast clouds,2.01,99,0.0,1
6,2025-04-16 06:00:00,13.88,13.41,80,Clouds,broken clouds,2.5,69,0.0,1
7,2025-04-16 09:00:00,19.08,18.58,59,Clear,clear sky,2.12,2,0.0,1
8,2025-04-16 12:00:00,22.95,22.4,42,Clear,clear sky,1.98,2,0.0,1
9,2025-04-16 15:00:00,24.0,23.45,38,Clouds,few clouds,1.23,11,0.0,1


In [124]:
#pushing weather data for Berlin to DB
df_berlin.to_sql('Weather',
                  if_exists='append',
                  con=connection_string,
                  index=False)
                

40

#HAMBURG PUSHING WEATHER DATA TO SQL WORKBENCH

In [None]:
#HAMBURG
latitude = 10.000000
longitude = 10.000000

API_key = "89b4a954f2f955292f6b3136080e5184"

url = (f"https://api.openweathermap.org/data/2.5/forecast?lat={latitude}&lon={longitude}&appid={API_key}&units=metric")

response = requests.get(url)
json_hamburg = response.json()

json_hamburg

In [139]:
weather_data = json_hamburg['list']  # Extract forecast list

# Extract only the relevant fields
filtered_data = []
for entry in weather_data:
    filtered_entry = {
        'datetime': entry['dt_txt'],
        'temp': entry['main']['temp'],
        'feels_like': entry['main']['feels_like'],
        'humidity': entry['main']['humidity'],
        'weather_main': entry['weather'][0]['main'],
        'weather_desc': entry['weather'][0]['description'],
        'wind_speed': entry['wind']['speed'],
        'clouds': entry['clouds']['all'],
        'rain_3h': entry.get('rain', {}).get('3h', 0.0) #fallback if it doesnt rain is 0.0
    }
    filtered_data.append(filtered_entry)

# Convert to DataFrame
df_hamburg = pd.DataFrame(filtered_data)

#Convert datetime to datetime object
df_hamburg['datetime'] = pd.to_datetime(df_hamburg['datetime'])

df_hamburg.columns = df_hamburg.columns.str.capitalize()  #capitalizing every column name

In [141]:
# Create SQLAlchemy engine
engine = create_engine(connection_string)

# Step 1: Fetch City_ID for Hamburg from the Cities table
conn = mysql.connector.connect(
    host=host,
    user=user,
    password=password,
    database=schema,
    port=port
)

cursor = conn.cursor()

# Fetch the City_ID for Hamburg
cursor.execute("SELECT City_ID FROM Cities WHERE City_Name = 'Hamburg'")
city_id = cursor.fetchone()[0]  # Gets the first row, fetches the first column (City_ID)

# Close the cursor and connection
cursor.close()
conn.close()


In [None]:
# Getting the "City_id" to the weather df--> needed for relational database connections
df_hamburg['City_ID'] = city_id
df_hamburg

In [143]:
#pushing weather data for Hamburg to DB
df_hamburg.to_sql('Weather',
                  if_exists='append',
                  con=connection_string,
                  index=False)

40

#MUNICH PUSHING WEATHER DATA TO SQL WORKBENCH

In [None]:
#MUNICH
latitude = 48.137500
longitude = 11.575000

API_key = "89b4a954f2f955292f6b3136080e5184"

url = (f"https://api.openweathermap.org/data/2.5/forecast?lat={latitude}&lon={longitude}&appid={API_key}&units=metric")

response = requests.get(url)
json_munich = response.json()

json_munich

In [146]:
weather_data = json_munich['list']  # Extract forecast list

# Extract only the relevant fields
filtered_data = []
for entry in weather_data:
    filtered_entry = {
        'datetime': entry['dt_txt'],
        'temp': entry['main']['temp'],
        'feels_like': entry['main']['feels_like'],
        'humidity': entry['main']['humidity'],
        'weather_main': entry['weather'][0]['main'],
        'weather_desc': entry['weather'][0]['description'],
        'wind_speed': entry['wind']['speed'],
        'clouds': entry['clouds']['all'],
        'rain_3h': entry.get('rain', {}).get('3h', 0.0) #fallback if it doesnt rain is 0.0
    }
    filtered_data.append(filtered_entry)

# Convert to DataFrame
df_munich = pd.DataFrame(filtered_data)

#Convert datetime to datetime object
df_munich['datetime'] = pd.to_datetime(df_munich['datetime'])

df_munich.columns = df_munich.columns.str.capitalize()  #capitalizing every column name

In [147]:
# Create SQLAlchemy engine
engine = create_engine(connection_string)

# Step 1: Fetch City_ID for Munich from the Cities table
conn = mysql.connector.connect(
    host=host,
    user=user,
    password=password,
    database=schema,
    port=port
)

cursor = conn.cursor()

# Fetch the City_ID for Munich
cursor.execute("SELECT City_ID FROM Cities WHERE City_Name = 'Munich'")
city_id = cursor.fetchone()[0]  # Gets the first row, fetches the first column (City_ID)

# Close the cursor and connection
cursor.close()
conn.close()

In [None]:
# Getting the "City_id" to the weather df--> needed for relational database connections
df_munich['City_ID'] = city_id
df_munich

In [149]:
#pushing weather data for Munich to DB
df_munich.to_sql('Weather',
                  if_exists='append',
                  con=connection_string,
                  index=False)

40

6. Creating the function to retrieve city data from my SQL workbench and to add weather data for cities

# Step 1: Load data
cities_df = pd.read_sql("SELECT * FROM cities", con=engine)

# Step 2: Call API for each city
for city in cities_df:
    # do stuff

# Step 3: Clean & prepare data
# Step 4: Store in DB


Step-by-Step	       Functional Thinking
Connect to DB	    ➡️ create_connection_string()
Read cities table	➡️ fetch_cities_data()
Call API per city	➡️ fetch_weather_data()
Save weather data	➡️ store_weather_data()
Combine everything	➡️ retrieve_and_store_weather()

In [None]:
# 🔧 CONFIG
schema = "gans"
host = "127.0.0.1"
user = "root"
password = "INSERTPASSWORDHERE"
port = 3306
API_key = "89b4a954f2f955292f6b3136080e5184"

# ✅ Step 1: Define database connection

# Create connection string for SQLAlchemy
connection_string = f'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{schema}'
engine = create_engine(connection_string)


# ✅ Step 2: Load city data from database
cities_df = pd.read_sql("SELECT * FROM cities", con=engine)


# API key for weather data
API_key = "89b4a954f2f955292f6b3136080e5184"

# Prepare list to hold all weather entries
weather_data = []

# ✅ Step 3: Loop over cities and fetch weather data
for _, city in cities_df.iterrows():
    latitude = city['Latitude']
    longitude = city['Longitude']
    city_id = city['City_id'] 

    # Make API call
    url = (
        f"https://api.openweathermap.org/data/2.5/forecast?lat={latitude}"
        f"&lon={longitude}&appid={API_key}&units=metric"
    )
    response = requests.get(url)
    data = response.json()

    # ✅ Step 4: Process weather entries for each city
    for entry in data['list']:
        weather_entry = {
            'city_id': city_id,
            'datetime': entry['dt_txt'],
            'temp': entry['main']['temp'],
            'feels_like': entry['main']['feels_like'],
            'humidity': entry['main']['humidity'],
            'weather_main': entry['weather'][0]['main'],
            'weather_desc': entry['weather'][0]['description'],
            'wind_speed': entry['wind']['speed'],
            'clouds': entry['clouds']['all'],
            'rain_3h': entry.get('rain', {}).get('3h', 0.0)
        }
        weather_data.append(weather_entry)

# ✅ Step 5: Create DataFrame
df = pd.DataFrame(weather_data)

# Convert datetime column
df['datetime'] = pd.to_datetime(df['datetime'])

# Capitalize column names
df.columns = df.columns.str.capitalize()

# ✅ Optional: Save to database
df.to_sql('weather', con=engine, if_exists='append', index=False)


120

In [199]:
#testing it by first running in SQL:
#TRUNCATE TABLE weather; #removes table but not the structure and schema of it

retrieve_and_send_data()





#EXAMPLE SOLUTIONS BELOW
_________________________________________________________________________________________________________________________
_________________________________________________________________________________________________________________________
_________________________________________________________________________________________________________________________
_________________________________________________________________________________________________________________________
_________________________________________________________________________________________________________________________


In [187]:
def retrieve_and_send_data():
  connection_string = create_connection_string()
  cities_df = fetch_cities_data(connection_string)
  weather_df = fetch_weather_data(cities_df)
  store_weather_data(weather_df, connection_string)
  return "Data has been updated"

def create_connection_string():
  schema = "gans"
  host = "127.0.0.1"
  user = "root"
  password = "YOUR_PASSWORD_HERE"
  port = 3306
  return f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

def fetch_cities_data(connection_string):
  return pd.read_sql("cities", con=connection_string)

def fetch_weather_data(cities_df):
  berlin_timezone = timezone('Europe/Berlin')
  API_key = 'YOUR_API_HERE'
  weather_items = []

  for _, city in cities_df.iterrows():
      latitude = city["latitude"]
      longitude = city["longitude"]
      city_id = city["city_id"]

      url = (f"https://api.openweathermap.org/data/2.5/forecast?lat={latitude}&lon={longitude}&appid={API_key}&units=metric")
      response = requests.get(url)
      weather_data = response.json()

      retrieval_time = datetime.now(berlin_timezone).strftime("%Y-%m-%d %H:%M:%S")

      for item in weather_data["list"]:
          weather_item = {
              "city_id": city_id,
              "forecast_time": item.get("dt_txt"),
              "temperature": item["main"].get("temp"),
              "forecast": item["weather"][0].get("main"),
              "rain_in_last_3h": item.get("rain", {}).get("3h", 0),
              "wind_speed": item["wind"].get("speed"),
              "data_retrieved_at": retrieval_time
          }
          weather_items.append(weather_item)

  weather_df = pd.DataFrame(weather_items)
  weather_df["forecast_time"] = pd.to_datetime(weather_df["forecast_time"])
  weather_df["data_retrieved_at"] = pd.to_datetime(weather_df["data_retrieved_at"])

  return weather_df

def store_weather_data(weather_df, connection_string):
  weather_df.to_sql('weather',
                    if_exists='append',
                    con=connection_string,
                    index=False)

In [None]:
######FROM CHATPGT BUT WORKING
# 🔧 CONFIG
schema = "gans"
host = "127.0.0.1"
user = "root"
password = "INSERTPASSWORDHERE"
port = 3306
API_key = "89b4a954f2f955292f6b3136080e5184"


# ✅ STEP 1: Create SQLAlchemy engine
def create_connection_string():
    return f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{schema}"

# ✅ STEP 2: Fetch cities data from SQL
def fetch_cities_data(connection_string):
    engine = create_engine(connection_string)
    query = "SELECT * FROM cities"
    return pd.read_sql(query, con=engine)

# ✅ STEP 3: Fetch weather data from API for each city
def fetch_weather_data(cities_df):
    weather_data = []

    for _, city in cities_df.iterrows():
        latitude = city['Latitude']
        longitude = city['Longitude']
        city_id = city['City_id']

        url = (
            f"https://api.openweathermap.org/data/2.5/forecast?"
            f"lat={latitude}&lon={longitude}&appid={API_key}&units=metric"
        )
        response = requests.get(url)
        data = response.json()

        for entry in data['list']:
            weather_data.append({
                'city_id': city_id,
                'datetime': entry['dt_txt'],
                'temp': entry['main']['temp'],
                'feels_like': entry['main']['feels_like'],
                'humidity': entry['main']['humidity'],
                'weather_main': entry['weather'][0]['main'],
                'weather_desc': entry['weather'][0]['description'],
                'wind_speed': entry['wind']['speed'],
                'clouds': entry['clouds']['all'],
                'rain_3h': entry.get('rain', {}).get('3h', 0.0)
            })

    return pd.DataFrame(weather_data)

# ✅ STEP 4: Store the data in your SQL DB
def store_weather_data(df, connection_string):
    df['datetime'] = pd.to_datetime(df['datetime'])
    df.columns = df.columns.str.capitalize()

    engine = create_engine(connection_string)
    df.to_sql('weather', con=engine, if_exists='append', index=False)

# ✅ STEP 5: The main function to run all steps
def retrieve_and_send_data():
    connection_string = create_connection_string()
    cities_df = fetch_cities_data(connection_string)
    weather_df = fetch_weather_data(cities_df)
    store_weather_data(weather_df, connection_string)


In [195]:
retrieve_and_send_data()

7. SENDING IT TO CLOUD  --> basically the same steps as above but on the cloud 

In [None]:
#Info for connection between mySQL Workbench and VS code
schema = "gans_cloud"
host = "xxxxx" #host from Google cloud account (public IP)
user = "root"
password = "INSERTPASSWORDHERE"
port = 3306

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [203]:
# Push the "cities_to_db" to the empty "cities" table in the MySQL data base

cities_to_db.to_sql('Cities',
                  if_exists='append',
                  con=connection_string,
                  index=False)

3