# <span style="color:#ff5f27">📝 Imports

In [24]:
import pandas as pd

from bs4 import BeautifulSoup
import requests 
import re
from datetime import date

import time

# <span style="color:#ff5f27"> 🏙️ Cities

In [2]:
locations_dict = {
    'London': 'https://www.yr.no/en/forecast/daily-table/2-2643743/United%20Kingdom/England/Greater%20London/London',
    'Paris': 'https://www.yr.no/en/forecast/daily-table/2-2988507/France/Île-de-France%20Region/Paris/Paris',
    'Stockholm': 'https://www.yr.no/en/forecast/daily-table/2-2673730/Sweden/Stockholm/Stockholm%20Municipality/Stockholm',
    'New York': 'https://www.yr.no/en/forecast/daily-table/2-5128581/United%20States/New%20York/New%20York',
    'Los Angeles': 'https://www.yr.no/en/forecast/daily-table/2-5368361/United%20States/California/Los%20Angeles/LA',
    'Singapore': 'https://www.yr.no/en/forecast/daily-table/2-1880252/Singapore/Singapore',
    'Sydney': 'https://www.yr.no/en/forecast/daily-table/2-2147714/Australia/New%20South%20Wales/Sydney',
    'Hong Kong': 'https://www.yr.no/en/forecast/daily-table/2-1819729/Hong%20Kong/Central%20and%20Western/Hong%20Kong',
    'Rome': 'https://www.yr.no/en/forecast/daily-table/2-3169070/Italy/Lazio/Province%20of%20Rome/Rome',
    'Kyiv': 'https://www.yr.no/en/forecast/daily-table/2-703448/Ukraine/Kyiv%20City/Kyiv',
}

len(locations_dict.keys())

10

# <span style="color:#ff5f27"> 🔮 Web Scraping

In [3]:
def parse_city(city_name, locations_dict=locations_dict):
    
    url = locations_dict[city_name]
    page = requests.get(url)
    
    assert page.status_code==200, f"Respond status: {page.status_code}"

    soup = BeautifulSoup(page.text, "html.parser")
    
    weather_condition_html = soup.findAll('div', class_='weather-symbol')[0]
    weather_condition = re.search(r'alt\=.\w+', str(weather_condition_html)).group().split('"')[-1]
    
    # API crushes when temperature is 0
    try:
        temperature_html = soup.findAll('span', class_='temperature temperature--warm')[0]
        temperature = int(re.search(r'\d+', str(temperature_html)).group())
    except:
        temperature = 0
    
    feels_like_html = soup.findAll('div', class_='feels-like-text')[0]
    feels_like = int(re.search(r'[-+]?\d+', str(feels_like_html)).group())   
    
    precipitation_html = soup.findAll('span', class_='now-hero__next-hour-precipitation-value')[0]
    precipitation = float(re.search(r'\d+', str(precipitation_html)).group()) 
    
    wind_speed_html = soup.findAll('span', class_='nrk-sr')[2]
    wind_speed = float(re.search(r'\d+', str(wind_speed_html)).group())
    
    max_min_temperature_html = soup.findAll('span', class_='min-max-temperature')[0]
    max_temperature, min_temperature = map(int,re.findall(r'[-+]?\d+', str(max_min_temperature_html)))
    
    return [
        city_name,
        date.today(),
        weather_condition,
        temperature,
        max_temperature,
        min_temperature,
        feels_like,
        precipitation,
        wind_speed
    ]

In [4]:
def parse_cities(cities):
    
    col_names = [
        'city',
        'date',
        'weather_condition',
        'temperature',
        'max_temperature',
        'min_temperature',
        'feels_like',
        'precipitation',
        'wind_speed'
    ]
    
    data = pd.DataFrame(columns=col_names)

    for city in cities:
        data.loc[len(data)] = parse_city(city)
        
    return data

In [5]:
data = parse_cities([*locations_dict.keys()])
data

Unnamed: 0,city,date,weather_condition,temperature,max_temperature,min_temperature,feels_like,precipitation,wind_speed
0,London,2023-02-02,partly,5,12,5,1,0.0,5.0
1,Paris,2023-02-02,cloudy,8,10,8,5,0.0,5.0
2,Stockholm,2023-02-02,cloudy,0,-1,-4,-7,0.0,4.0
3,New York,2023-02-02,partly,0,4,-4,-4,0.0,1.0
4,Los Angeles,2023-02-02,clear,7,18,6,4,0.0,3.0
5,Singapore,2023-02-02,light,27,27,25,31,0.0,4.0
6,Sydney,2023-02-02,fair,25,26,24,25,0.0,11.0
7,Hong Kong,2023-02-02,clear,19,19,16,19,0.0,9.0
8,Rome,2023-02-02,clear,5,14,5,2,0.0,4.0
9,Kyiv,2023-02-02,cloudy,2,3,-1,-2,0.0,4.0


---

# <span style="color:#ff5f27"> 🧙🏼‍♂️ Historical Data

In [22]:
def parse_weather_data(city, start_date, end_date, API_KEY='ED44UXM6D4UEH84BCHMG6UKYG'):
    formatted_url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{city.lower()}/{start_date}/{end_date}?unitGroup=metric&include=days&key={API_KEY}&contentType=csv"
    return pd.read_csv(formatted_url)


def get_weather_data(city, start_date, end_date):
    API_KEY = 'ED44UXM6D4UEH84BCHMG6UKYG'

    data = parse_weather_data(city, start_date, end_date, API_KEY)
    
    data['name'] = city
    
    data.rename(
        columns={
            'name':'city_name',
            'datetime': 'date',
            'icon': 'weather_condition',
            'temp': 'temperature',
            'tempmax': 'temperature_max',
            'tempmin': 'temperature_min',
            'precip': 'precipitation',
            'windspeed': 'wind_speed',
        },
        inplace=True
    )

    return data[[
            'city_name',
            'date',
            'weather_condition',
            'temperature',
            'temperature_max',
            'temperature_min',
            'feelslike',
            'precipitation',
            'wind_speed'
        ]]

In [20]:
def parse_historical_data(cities, start_date, end_date=None):

    if not end_date:
        end_date = start_date
        
    weather_data = pd.DataFrame()
    
    for city in cities:
        time.sleep(45)
        data = get_weather_data(city,start_date,end_date)
        weather_data = pd.concat([weather_data, data])
    
    return weather_data

In [23]:
from datetime import datetime

date_today = datetime.now().strftime("%Y-%m-%d")
cities = [*locations_dict.keys()]

data = parse_historical_data(['London'],'2022-01-01','2022-02-01')
data

Unnamed: 0,city_name,date,weather_condition,temperature,temperature_max,temperature_min,feelslike,precipitation,wind_speed
0,London,2022-01-01,rain,13.8,16.1,13.1,13.8,0.015,0.2
1,London,2022-01-02,rain,11.8,13.3,9.8,11.8,1.406,0.2
2,London,2022-01-03,partly-cloudy-day,9.9,11.0,9.1,9.9,0.0,0.2
3,London,2022-01-04,rain,5.7,9.0,2.1,5.4,4.39,16.7
4,London,2022-01-05,clear-day,3.4,6.5,1.1,3.4,0.0,0.1
5,London,2022-01-06,rain,3.0,7.3,-1.9,3.0,1.193,3.4
6,London,2022-01-07,rain,4.4,6.3,1.1,4.4,0.407,0.2
7,London,2022-01-08,rain,6.4,10.0,1.3,6.2,6.995,8.7
8,London,2022-01-09,rain,4.6,7.7,2.3,4.5,0.001,14.8
9,London,2022-01-10,rain,5.9,9.1,1.6,5.9,0.199,6.6


---