In [1]:
import pandas as pd
import requests
from datetime import datetime
from bs4 import BeautifulSoup as bs
import my_passwords as ps

In [2]:
schema = "gans"
host = "127.0.0.1"
user = "root"
password = ps.my_password
port = 3306

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [3]:
# Get the list of cities from our database
cities_from_sql = pd.read_sql("cities", con=connection_string)
cities_from_sql

Unnamed: 0,city_id,city,country_code
0,1,Berlin,DE
1,2,Hamburg,DE
2,3,Munich,DE
3,4,Stuttgart,DE


In [4]:
cities=cities_from_sql["city"]
cities

0       Berlin
1      Hamburg
2       Munich
3    Stuttgart
Name: city, dtype: object

First Method: Using URL, header and querystring

In [5]:
# Get the first value of temperature of Berlin
import pandas as pd
import requests
from datetime import datetime
from bs4 import BeautifulSoup as bs
from my_passwords import API_key_weather
city_name="Berlin"
url = "https://api.openweathermap.org/data/2.5/forecast" # API URL
header = {"X-Api-Key": API_key_weather} # API Key
querystring = {"q": city_name, "units":"metric"} # We need the ciry name and metric to have 
                                                #  the temperature on Celsius
weather = requests.request("GET", url, headers=header, params=querystring) # Send request
weather_json = weather.json() # Get the response in a JSON format
weather_json["list"][0]["main"]["temp"] # Extract the first value of temperature in the list


8.62

Second Method: Put everything in the URL

In [6]:
#Get the first value of temperature of Berlin
import pandas as pd
import requests
from datetime import datetime
from bs4 import BeautifulSoup as bs
from my_passwords import API_key_weather
city_name="Berlin"
url = f"http://api.openweathermap.org/data/2.5/forecast?q={city_name}&appid={API_key_weather}&units=metric" 
weather = requests.request("GET", url) # Send request
weather_json = weather.json() # Get the response in a JSON format
weather_json["list"][0]["main"]["temp"] # Extract the first value of temperature in the list


8.62

get_weather_info() Function

In [7]:
def get_weather_info(cities):
  
  weather_dic = {"city": [],
    "country_code": [],
    "weather_time": [],
    "data_collected_time": [],
    "temperature": [],
    "weather_outlook": [],
    "weather_description": [],
    "wind_speed": [],
    "chance_rain": [],
    "rain": [],
    "snow": []
    }
  
  #from my_passwords import API_key
  for city_name in cities:

    # Define the sections that will together form the url.
    API_key = "50d7bdb6d49da36ec7cbb12895a88db3"
    url = "https://api.openweathermap.org/data/2.5/forecast"
    header = {"X-Api-Key": API_key}
    querystring = {"q": city_name, "units":"metric"}

    # Reference the sections in the request.
    weather = requests.request("GET", url, headers=header, params=querystring)
    weather_json = weather.json()
    now=datetime.now()

    for w in weather_json["list"]:
      weather_dic["city"].append(weather_json["city"]["name"])
      weather_dic["country_code"].append(weather_json["city"]["country"])
      weather_dic["weather_time"].append(w["dt_txt"])
      weather_dic["data_collected_time"].append(now.strftime("%Y-%m-%d %H:%M:%S"))
      weather_dic["temperature"].append(w["main"]["temp"])
      weather_dic["weather_outlook"].append(w["weather"][0]["main"])
      weather_dic["weather_description"].append(w["weather"][0]["description"])
      weather_dic["wind_speed"].append(w["wind"]["speed"])
      weather_dic["chance_rain"].append(w["pop"])
      try:
        weather_dic["rain"].append(w["rain"]["3h"])
      except:
        weather_dic["rain"].append(0)
      try:
        weather_dic["snow"].append(w["snow"]["3h"])
      except:
        weather_dic["snow"].append(0)  

  weather_df = pd.DataFrame(weather_dic)
  weather_df['weather_time'] = pd.to_datetime(weather_df['weather_time'])
  weather_df['data_collected_time'] = pd.to_datetime(weather_df['data_collected_time'])
  weather_df['snow'] = weather_df['snow'].astype(float)
  weather_df.loc[weather_df['city']=="Frankfurt am Main", "city"]="Frankfurt"

  return weather_df

In [8]:
weather_df=get_weather_info(cities)

In [9]:
weather_df

Unnamed: 0,city,country_code,weather_time,data_collected_time,temperature,weather_outlook,weather_description,wind_speed,chance_rain,rain,snow
0,Berlin,DE,2024-03-18 15:00:00,2024-03-18 15:40:25,8.62,Clear,clear sky,2.93,0.00,0.00,0.0
1,Berlin,DE,2024-03-18 18:00:00,2024-03-18 15:40:25,7.55,Clouds,scattered clouds,2.73,0.00,0.00,0.0
2,Berlin,DE,2024-03-18 21:00:00,2024-03-18 15:40:25,5.19,Clouds,scattered clouds,2.77,0.00,0.00,0.0
3,Berlin,DE,2024-03-19 00:00:00,2024-03-18 15:40:25,1.98,Clouds,broken clouds,2.54,0.00,0.00,0.0
4,Berlin,DE,2024-03-19 03:00:00,2024-03-18 15:40:25,0.92,Clouds,few clouds,2.41,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...
155,Stuttgart,DE,2024-03-23 00:00:00,2024-03-18 15:40:26,9.71,Clouds,scattered clouds,1.93,0.00,0.00,0.0
156,Stuttgart,DE,2024-03-23 03:00:00,2024-03-18 15:40:26,8.92,Clouds,few clouds,2.05,0.00,0.00,0.0
157,Stuttgart,DE,2024-03-23 06:00:00,2024-03-18 15:40:26,10.20,Rain,light rain,4.96,0.84,0.96,0.0
158,Stuttgart,DE,2024-03-23 09:00:00,2024-03-18 15:40:26,7.35,Rain,light rain,8.10,1.00,2.01,0.0


Data cleaning

In [10]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   city                 160 non-null    object        
 1   country_code         160 non-null    object        
 2   weather_time         160 non-null    datetime64[ns]
 3   data_collected_time  160 non-null    datetime64[ns]
 4   temperature          160 non-null    float64       
 5   weather_outlook      160 non-null    object        
 6   weather_description  160 non-null    object        
 7   wind_speed           160 non-null    float64       
 8   chance_rain          160 non-null    float64       
 9   rain                 160 non-null    float64       
 10  snow                 160 non-null    float64       
dtypes: datetime64[ns](2), float64(5), object(4)
memory usage: 13.9+ KB


In [11]:
# Convert data columns to the right types
weather_df['weather_time'] = pd.to_datetime(weather_df['weather_time'])
weather_df['data_collected_time'] = pd.to_datetime(weather_df['data_collected_time'])
weather_df['snow'] = weather_df['snow'].astype(float)


Connect to SQL

In [12]:
from my_passwords import my_password

In [13]:
schema = "gans"
host = "127.0.0.1"
user = "root"
password = my_password
port = 3306

connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

In [14]:
cities_from_sql = pd.read_sql("cities", con=connection_string)
cities_from_sql

Unnamed: 0,city_id,city,country_code
0,1,Berlin,DE
1,2,Hamburg,DE
2,3,Munich,DE
3,4,Stuttgart,DE


In [15]:
def get_weather_info(cities_df):
  
  weather_dic = {"city_id": [],
    "weather_time": [],
    "data_collected_time": [],
    "temperature": [],
    "weather_outlook": [],
    "weather_description": [],
    "wind_speed": [],
    "chance_rain": [],
    "rain": [],
    "snow": []
    }
  
  #from my_passwords import API_key
  for city_name in cities_df["city"]:

    # Define the sections that will together form the url.
    API_key = "50d7bdb6d49da36ec7cbb12895a88db3"
    url = "https://api.openweathermap.org/data/2.5/forecast"
    header = {"X-Api-Key": API_key}
    querystring = {"q": city_name, "units":"metric"}

    # Reference the sections in the request.
    weather = requests.request("GET", url, headers=header, params=querystring)
    weather_json = weather.json()
    now=datetime.now()

    
    city_id = cities_df.loc[cities_df["city"] == city_name, "city_id"].values[0]

    for w in weather_json["list"]:
      weather_dic["city_id"].append(city_id)
      weather_dic["weather_time"].append(w["dt_txt"])
      weather_dic["data_collected_time"].append(now.strftime("%Y-%m-%d %H:%M:%S"))
      weather_dic["temperature"].append(w["main"]["temp"])
      weather_dic["weather_outlook"].append(w["weather"][0]["main"])
      weather_dic["weather_description"].append(w["weather"][0]["description"])
      weather_dic["wind_speed"].append(w["wind"]["speed"])
      weather_dic["chance_rain"].append(w["pop"])
      try:
        weather_dic["rain"].append(w["rain"]["3h"])
      except:
        weather_dic["rain"].append(0)
      try:
        weather_dic["snow"].append(w["snow"]["3h"])
      except:
        weather_dic["snow"].append(0)  

  weather_df = pd.DataFrame(weather_dic)
  weather_df['weather_time'] = pd.to_datetime(weather_df['weather_time'])
  weather_df['data_collected_time'] = pd.to_datetime(weather_df['data_collected_time'])
  weather_df['snow'] = weather_df['snow'].astype(float)
  #weather_df.loc[weather_df['city']=="Frankfurt am Main", "city"]="Frankfurt"

  return weather_df

In [16]:
weather_infos_df=get_weather_info(cities_from_sql)

In [17]:
weather_infos_df

Unnamed: 0,city_id,weather_time,data_collected_time,temperature,weather_outlook,weather_description,wind_speed,chance_rain,rain,snow
0,1,2024-03-18 15:00:00,2024-03-18 15:40:27,8.62,Clear,clear sky,2.93,0.00,0.00,0.0
1,1,2024-03-18 18:00:00,2024-03-18 15:40:27,7.55,Clouds,scattered clouds,2.73,0.00,0.00,0.0
2,1,2024-03-18 21:00:00,2024-03-18 15:40:27,5.19,Clouds,scattered clouds,2.77,0.00,0.00,0.0
3,1,2024-03-19 00:00:00,2024-03-18 15:40:27,1.98,Clouds,broken clouds,2.54,0.00,0.00,0.0
4,1,2024-03-19 03:00:00,2024-03-18 15:40:27,0.92,Clouds,few clouds,2.41,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...
155,4,2024-03-23 00:00:00,2024-03-18 15:40:27,9.71,Clouds,scattered clouds,1.93,0.00,0.00,0.0
156,4,2024-03-23 03:00:00,2024-03-18 15:40:27,8.92,Clouds,few clouds,2.05,0.00,0.00,0.0
157,4,2024-03-23 06:00:00,2024-03-18 15:40:27,10.20,Rain,light rain,4.96,0.84,0.96,0.0
158,4,2024-03-23 09:00:00,2024-03-18 15:40:27,7.35,Rain,light rain,8.10,1.00,2.01,0.0


In [18]:
weather_infos_df.to_sql('weather_infos',
                if_exists='append',
                con=connection_string,
                index=False)

160

In [19]:
weather_infos_from_sql = pd.read_sql("weather_infos", con=connection_string)
weather_infos_from_sql

Unnamed: 0,weather_id,city_id,weather_time,data_collected_time,temperature,weather_outlook,weather_description,wind_speed,chance_rain,rain,snow
0,1,1,2024-03-18 15:00:00,2024-03-18 15:40:27,9.0,Clear,clear sky,3.0,0.0,0.0,0.0
1,2,1,2024-03-18 18:00:00,2024-03-18 15:40:27,8.0,Clouds,scattered clouds,3.0,0.0,0.0,0.0
2,3,1,2024-03-18 21:00:00,2024-03-18 15:40:27,5.0,Clouds,scattered clouds,3.0,0.0,0.0,0.0
3,4,1,2024-03-19 00:00:00,2024-03-18 15:40:27,2.0,Clouds,broken clouds,3.0,0.0,0.0,0.0
4,5,1,2024-03-19 03:00:00,2024-03-18 15:40:27,1.0,Clouds,few clouds,2.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
155,156,4,2024-03-23 00:00:00,2024-03-18 15:40:27,10.0,Clouds,scattered clouds,2.0,0.0,0.0,0.0
156,157,4,2024-03-23 03:00:00,2024-03-18 15:40:27,9.0,Clouds,few clouds,2.0,0.0,0.0,0.0
157,158,4,2024-03-23 06:00:00,2024-03-18 15:40:27,10.0,Rain,light rain,5.0,1.0,1.0,0.0
158,159,4,2024-03-23 09:00:00,2024-03-18 15:40:27,7.0,Rain,light rain,8.0,1.0,2.0,0.0
