In [4]:
# Import required libraries
import pandas as pd
from datetime import datetime
from meteostat import Hourly, Point 
from google.colab import files

In [5]:
url = 'https://drive.google.com/file/d/1qd4EMFjFShK-11yhdkJ3CI69IwL61L8i/view?usp=share_link' 
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
belgium_cities = pd.read_csv(path)

In [6]:
# Filter Belgium cities
belgium_cities = belgium_cities.loc[belgium_cities['country'] == 'Belgium']
belgium_cities

Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id
421,Brussels,Brussels,50.8353,4.3314,Belgium,BE,BEL,Brussels-Capital Region,primary,1743000.0,1056469830
1338,Antwerp,Antwerp,51.2211,4.3997,Belgium,BE,BEL,Flanders,minor,529247.0,1056168623
2298,Gent,Gent,51.0536,3.7253,Belgium,BE,BEL,Flanders,minor,262219.0,1056062897
2804,Charleroi,Charleroi,50.4167,4.4442,Belgium,BE,BEL,Wallonia,minor,201816.0,1056263311
2859,Liège,Liege,50.6397,5.5706,Belgium,BE,BEL,Wallonia,minor,196623.0,1056513284
...,...,...,...,...,...,...,...,...,...,...,...
41530,Chiny,Chiny,49.7383,5.3433,Belgium,BE,BEL,Wallonia,,5175.0,1056542075
41579,Libin,Libin,49.9828,5.2578,Belgium,BE,BEL,Wallonia,,5164.0,1056311682
41630,Ohey,Ohey,50.4353,5.1217,Belgium,BE,BEL,Wallonia,,5090.0,1056175337
41632,Havelange,Havelange,50.3833,5.2500,Belgium,BE,BEL,Wallonia,,5130.0,1056076488


In [7]:
# Create a dictionary of all cities with their latitudes and longitudes
var_dict = {}
for i in range(belgium_cities.shape[0]): 
    var_dict[i + 1] = list(belgium_cities[['city', 'lat', 'lng']].iloc[i])
# Display the second city's data from the dictionary
var_dict[2]

['Antwerp', 51.2211, 4.3997]

In [10]:
# Set time period for getting the data from the meteostat library
start = datetime(2022, 1, 1)
end = datetime(2023, 1, 1, 23, 59)

In [11]:
# Get hourly weather data for the first city in the dictionary using the Hourly object of the meteostat library
df_1 = Hourly(Point(var_dict[1][1], var_dict[1][2]), start, end)
df_1 = df_1.fetch()
# Add city name column to the dataframe
df_1['city'] = var_dict[1][0]
df_1



Unnamed: 0_level_0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco,city
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-01-01 00:00:00,13.0,10.7,86.0,0.0,,220.0,18.0,25.9,1024.5,,4.0,Brussels
2022-01-01 01:00:00,13.0,10.2,83.0,0.0,,210.0,14.4,25.9,1024.7,,4.0,Brussels
2022-01-01 02:00:00,12.9,9.9,82.0,0.0,,210.0,14.4,25.9,1024.9,,4.0,Brussels
2022-01-01 03:00:00,12.3,9.9,85.0,0.0,,180.0,10.8,25.9,1024.4,,4.0,Brussels
2022-01-01 04:00:00,11.8,9.5,86.0,0.0,,180.0,10.8,24.1,1024.6,,4.0,Brussels
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-01 19:00:00,10.6,10.0,96.0,0.9,,170.0,21.6,27.8,1013.0,,8.0,Brussels
2023-01-01 20:00:00,10.6,9.8,95.0,1.3,,150.0,21.6,27.8,1011.7,,9.0,Brussels
2023-01-01 21:00:00,11.4,10.3,93.0,0.5,,190.0,14.4,29.6,1011.6,,8.0,Brussels
2023-01-01 22:00:00,11.6,10.3,92.0,0.1,,180.0,18.0,33.3,1011.1,,7.0,Brussels


In [12]:
# Get hourly weather data for the i-th city in the dictionary (i = 1)
i=1
df_temp = Hourly(Point(var_dict[i][1], var_dict[i][2]), start, end)
df_temp
     

<meteostat.interface.hourly.Hourly at 0x7f2e2a9635b0>

In [13]:
# Create an empty dataframe to store weather data for all cities
df = pd.DataFrame()
# Loop through all cities in the dictionary and append their weather data to the dataframe
for i in range(1, len(var_dict)):
    df_temp = pd.DataFrame() 
    df_temp = Hourly(Point(var_dict[i][1], var_dict[i][2]), start, end)
    df_temp = df_temp.fetch()
    df_temp['city'] = var_dict[i][0]
    df_temp['time'] = df_temp.index
    df_1 = df_1.append(df_temp, ignore_index=True)



In [23]:
df_1

Unnamed: 0,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco,city,time
8784,13.0,10.7,86.0,0.0,,220.0,18.0,25.9,1024.5,,4.0,Brussels,2022-01-01 00:00:00
8785,13.0,10.2,83.0,0.0,,210.0,14.4,25.9,1024.7,,4.0,Brussels,2022-01-01 01:00:00
8786,12.9,9.9,82.0,0.0,,210.0,14.4,25.9,1024.9,,4.0,Brussels,2022-01-01 02:00:00
8787,12.3,9.9,85.0,0.0,,180.0,10.8,25.9,1024.4,,4.0,Brussels,2022-01-01 03:00:00
8788,11.8,9.5,86.0,0.0,,180.0,10.8,24.1,1024.6,,4.0,Brussels,2022-01-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4110907,10.1,8.4,89.0,0.0,,160.0,10.8,35.2,1015.6,,3.0,Havelange,2023-01-01 19:00:00
4110908,11.6,9.0,84.0,0.0,,180.0,18.0,35.2,1014.7,,3.0,Havelange,2023-01-01 20:00:00
4110909,12.2,7.9,75.0,0.0,,180.0,21.6,38.9,1014.5,,3.0,Havelange,2023-01-01 21:00:00
4110910,12.4,7.7,73.0,0.0,,190.0,18.0,38.9,1014.4,,3.0,Havelange,2023-01-01 22:00:00


In [24]:
df_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4102128 entries, 8784 to 4110911
Data columns (total 13 columns):
 #   Column  Dtype         
---  ------  -----         
 0   temp    float64       
 1   dwpt    float64       
 2   rhum    float64       
 3   prcp    float64       
 4   snow    float64       
 5   wdir    float64       
 6   wspd    float64       
 7   wpgt    float64       
 8   pres    float64       
 9   tsun    float64       
 10  coco    float64       
 11  city    object        
 12  time    datetime64[ns]
dtypes: datetime64[ns](1), float64(11), object(1)
memory usage: 438.2+ MB


In [22]:
df_1.dropna(subset=('time'), how='any', inplace=True)

In [25]:
new = df_1[df_1['time'].dt.year == 2022]

In [33]:
new = new[new['time'].dt.month == 2]

In [34]:
new.shape

(313824, 13)

In [35]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [36]:
# save dataframe to a CSV file
new.to_csv('/content/drive/MyDrive/Berlin_weather.csv', index=False)