In [44]:
import pandas as pd
import itertools

In [45]:
df=pd.read_csv("../../Dataset/abu_dhabi.csv")

In [46]:
df['lat_first3'] = df['lat'].apply(lambda x: str(x).replace('.', '')[:4])

# Find unique first 3-digit groups
unique_lat = df['lat_first3'].unique()
count_unique_first3 = len(unique_lat)

print("Unique first 3-digit groups:", unique_lat)
print("Number of unique groups:", count_unique_first3)

Unique first 3-digit groups: ['2462' '2461' '2460' '2459' '2458' '2457' '2456' '2455' '2454' '2453'
 '2452' '2451' '2450' '2449' '2448' '2447' '2446' '2445' '2444' '2443'
 '2442' '2441' '2440' '2439' '2438' '2437' '2436' '2435' '2434' '2433'
 '2432' '2431' '2430' '2429' '2428' '2427' '2426' '2425' '2424' '2423'
 '2422' '2421' '2420' '2419']
Number of unique groups: 44


In [47]:
df['lng_first3'] = df['lng'].apply(lambda x: str(x).replace('.', '')[:4])

# Find unique first 3-digit groups
unique_lng = df['lng_first3'].unique()
count_unique_first3 = len(unique_lng)

print("Unique first 3-digit groups:", unique_lng)
print("Number of unique groups:", count_unique_first3)

Unique first 3-digit groups: ['5470' '5466' '5469' '5459' '5458' '5468' '5463' '5467' '5464' '5444'
 '5445' '5465' '5462' '5443' '5450' '5442' '5438' '5439' '5440' '5447'
 '5436' '5437' '5441' '5449' '5435' '5452' '5460' '5461' '5446' '5456'
 '5451' '5434' '5432' '5433' '5431' '5448' '5429' '5430' '5455' '5453'
 '5457' '5454' '5425' '5424']
Number of unique groups: 44


In [48]:
final_points = list(itertools.product(unique_lat, unique_lng))

# Turn into a DataFrame
df_final_points = pd.DataFrame(final_points, columns=['lat', 'lng'])

print(df_final_points)

       lat   lng
0     2462  5470
1     2462  5466
2     2462  5469
3     2462  5459
4     2462  5458
...    ...   ...
1931  2419  5453
1932  2419  5457
1933  2419  5454
1934  2419  5425
1935  2419  5424

[1936 rows x 2 columns]


In [49]:
# Function to insert a '.' after first two digits
def insert_dot(x):
    x_str = str(int(x))  # ensure it's string and integer
    return float(x_str[:2] + '.' + x_str[2:])

# Apply to both columns
df_final_points['lat'] = df_final_points['lat'].apply(insert_dot)
df_final_points['lng'] = df_final_points['lng'].apply(insert_dot)

print(df_final_points)

        lat    lng
0     24.62  54.70
1     24.62  54.66
2     24.62  54.69
3     24.62  54.59
4     24.62  54.58
...     ...    ...
1931  24.19  54.53
1932  24.19  54.57
1933  24.19  54.54
1934  24.19  54.25
1935  24.19  54.24

[1936 rows x 2 columns]


# API Calls

In [50]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [51]:
import pandas as pd

# Prepare
url = "https://archive-api.open-meteo.com/v1/archive"
final_rows = []

# Loop through points
for idx, row in df_final_points.iloc[1:].iterrows():
    if idx%100==0:
        print(idx)
    lat = row['lat']
    lng = row['lng']

    # Prepare params for API call
    params = {
        "latitude": [lat],
        "longitude": [lng],
        "start_date": "2025-04-10",
        "end_date": "2025-04-24",
        "hourly": "temperature_2m"
    }

    try:
        # Fetch data
        response = openmeteo.weather_api(url, params=params)[0]

        # Build hourly dataframe
        hourly = response.Hourly()
        dates = pd.date_range(
            start = pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end = pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq = pd.Timedelta(seconds=hourly.Interval()),
            inclusive = "left"
        )
        hourly_dataframe = pd.DataFrame({
            "date": dates,
            "temperature_2m": hourly.Variables(0).ValuesAsNumpy()
        })

        # Filter and compute average at 18:00
        average_temperature_18 = hourly_dataframe.loc[
            hourly_dataframe['date'].dt.hour == 18, 'temperature_2m'
        ].mean()

        # Save the results into list
        final_rows.append({
            "lat": lat,
            "lng": lng,
            "avg_temp_18": average_temperature_18
        })

    except Exception as e:
        print(f"Error processing point ({lat}, {lng}): {e}")
        continue

# Create final DataFrame
final_df = pd.DataFrame(final_rows)

print(final_df)

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
        lat    lng  avg_temp_18
0     24.62  54.66    26.030836
1     24.62  54.69    25.683830
2     24.62  54.59    26.037336
3     24.62  54.58    26.043837
4     24.62  54.68    26.011335
...     ...    ...          ...
1930  24.19  54.53    27.350668
1931  24.19  54.57    27.370167
1932  24.19  54.54    27.402668
1933  24.19  54.25    26.604000
1934  24.19  54.24    26.604000

[1935 rows x 3 columns]


In [52]:
final_df.to_csv('weather_abu_dhabi.csv')