In [1]:
#install adapter
!pip install azure-storage-blob
!pip install boto3



In [2]:
import json
import boto3
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from io import StringIO
import pandas as pd

In [3]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"start_date": "2023-07-01",
	"end_date": "2023-12-31",
	"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "wind_speed_100m"]
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_speed_100m = hourly.Variables(3).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["wind_speed_100m"] = hourly_wind_speed_100m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

Coordinates 52.5483283996582°N 13.407821655273438°E
Elevation 38.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                          date  temperature_2m  relative_humidity_2m  \
0    2023-07-01 00:00:00+00:00       16.758501             89.997665   
1    2023-07-01 01:00:00+00:00       16.658501             90.570862   
2    2023-07-01 02:00:00+00:00       15.408501             96.527077   
3    2023-07-01 03:00:00+00:00       15.208500             91.059433   
4    2023-07-01 04:00:00+00:00       15.758500             92.280869   
...                        ...             ...                   ...   
4411 2023-12-31 19:00:00+00:00        6.308500             83.442131   
4412 2023-12-31 20:00:00+00:00        6.658500             84.960327   
4413 2023-12-31 21:00:00+00:00        6.058500             87.312302   
4414 2023-12-31 22:00:00+00:00        5.208500             88.159035   
4415 2023-12-31 23:00:00+00:00        5.158500             86.918320   

      prec

In [4]:
print(hourly_dataframe)

                          date  temperature_2m  relative_humidity_2m  \
0    2023-07-01 00:00:00+00:00       16.758501             89.997665   
1    2023-07-01 01:00:00+00:00       16.658501             90.570862   
2    2023-07-01 02:00:00+00:00       15.408501             96.527077   
3    2023-07-01 03:00:00+00:00       15.208500             91.059433   
4    2023-07-01 04:00:00+00:00       15.758500             92.280869   
...                        ...             ...                   ...   
4411 2023-12-31 19:00:00+00:00        6.308500             83.442131   
4412 2023-12-31 20:00:00+00:00        6.658500             84.960327   
4413 2023-12-31 21:00:00+00:00        6.058500             87.312302   
4414 2023-12-31 22:00:00+00:00        5.208500             88.159035   
4415 2023-12-31 23:00:00+00:00        5.158500             86.918320   

      precipitation  wind_speed_100m  
0               0.0        13.551501  
1               0.0        13.493999  
2               0.

In [5]:
# check null data
hourly_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4416 entries, 0 to 4415
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   date                  4416 non-null   datetime64[ns, UTC]
 1   temperature_2m        4416 non-null   float32            
 2   relative_humidity_2m  4416 non-null   float32            
 3   precipitation         4416 non-null   float32            
 4   wind_speed_100m       4416 non-null   float32            
dtypes: datetime64[ns, UTC](1), float32(4)
memory usage: 103.6 KB


In [8]:
#put into cloud storatge (create and upload)
CONNECTION_STRING_AZURE_STORAGE = "DefaultEndpointsProtocol=https;AccountName=teamprojectcis9440;AccountKey=gD9xnf//7J7h70d5tgNEEZDOQ+XUHxM1Se78wNXVzKYrfueXt0qZ8dYgqSLjGlKBBriI6Jwv+614+ASt60ATjQ==;EndpointSuffix=core.windows.net"
CONTAINER_AZURE = 'tsaweather'
blob_name = "weatherdata Jul1-Dec31.csv"
# Convert DataFrame to CSV
output = StringIO()
hourly_dataframe.to_csv(output, index=False)
data = output.getvalue()
output.close()

# Create the BlobServiceClient object
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING_AZURE_STORAGE)

# Get a blob client using the container name and blob name
blob_client = blob_service_client.get_blob_client(container=CONTAINER_AZURE, blob=blob_name)

# Upload the CSV data
blob_client.upload_blob(data, overwrite=True)

print(f"Uploaded {blob_name} to Azure Blob Storage in container {CONTAINER_AZURE}.")

Uploaded weatherdata Jul1-Dec31.csv to Azure Blob Storage in container tsaweather.
