<a href="https://colab.research.google.com/github/Afroza2/Strativ-AB-Travel-Management/blob/main/Dhaka_Weather_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Checking the python version**

In [1]:
!python3 --version

Python 3.10.12


# Installing some packages

In [4]:
!pip install openmeteo_requests
!pip install pandas
!pip install requests_cache
!pip install retry_requests

Collecting openmeteo_requests
  Downloading openmeteo_requests-1.1.0-py3-none-any.whl (5.5 kB)
Collecting openmeteo-sdk>=1.4.0 (from openmeteo_requests)
  Downloading openmeteo_sdk-1.5.0-py3-none-any.whl (12 kB)
Installing collected packages: openmeteo-sdk, openmeteo_requests
Successfully installed openmeteo-sdk-1.5.0 openmeteo_requests-1.1.0
Collecting requests_cache
  Downloading requests_cache-1.1.0-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting cattrs>=22.2 (from requests_cache)
  Downloading cattrs-23.1.2-py3-none-any.whl (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.8/50.8 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Collecting url-normalize>=1.4 (from requests_cache)
  Downloading url_normalize-1.4.3-py2.py3-none-any.whl (6.8 kB)
Installing collected packages: url-normalize, cattrs, requests_cache
Successfully installed cattrs-23.1.2 requests

# Fetching Dhaka weather data from 1940 and saving the data in a dataframe

In [6]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)


url = "https://archive-api.open-meteo.com/v1/archive"

params = {
            "latitude": 23.8103,  # Dhaka latitude
            "longitude": 90.4125,  # Dhaka longitude
            "start_date": "1940-01-01",
            "end_date": "2023-11-10",
            "hourly": "temperature_2m"
        }
responses = openmeteo.weather_api(url, params=params)

response = responses[0]
print(f"Coordinates {response.Latitude()}°E {response.Longitude()}°N")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

if hourly_temperature_2m.size == 0:
    print("No temperature data available for the specified date range.")
else:
    hourly_data = {"date": pd.date_range(
    start = pd.to_datetime(hourly.Time(), unit = "s"),
    end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
    freq = pd.Timedelta(seconds = hourly.Interval()),
    inclusive = "left"
            )}
    hourly_data["temperature_2m"] = hourly_temperature_2m

    hourly_dataframe = pd.DataFrame(data = hourly_data)

if hourly_dataframe['temperature_2m'].isnull().values.any():
    print("Null values found in temperature data. Replacing with appropriate value.")

    hourly_dataframe['temperature_2m'].fillna(method='ffill', inplace=True)

    print(hourly_dataframe)


    # hourly_dataframe.to_csv('weather_data.csv', index=False)








Coordinates 23.796133041381836°E 90.38054656982422°N
Elevation 19.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Null values found in temperature data. Replacing with appropriate value.
                      date  temperature_2m
0      1940-01-01 00:00:00       14.178500
1      1940-01-01 01:00:00       14.778501
2      1940-01-01 02:00:00       16.078499
3      1940-01-01 03:00:00       19.978498
4      1940-01-01 04:00:00       23.078499
...                    ...             ...
735115 2023-11-10 19:00:00       18.348000
735116 2023-11-10 20:00:00       18.348000
735117 2023-11-10 21:00:00       18.348000
735118 2023-11-10 22:00:00       18.348000
735119 2023-11-10 23:00:00       18.348000

[735120 rows x 2 columns]


# Checking the dataframe and its distribution

In [8]:
hourly_dataframe.head(100)

Unnamed: 0,date,temperature_2m
0,1940-01-01 00:00:00,14.178500
1,1940-01-01 01:00:00,14.778501
2,1940-01-01 02:00:00,16.078499
3,1940-01-01 03:00:00,19.978498
4,1940-01-01 04:00:00,23.078499
...,...,...
95,1940-01-04 23:00:00,11.928500
96,1940-01-05 00:00:00,11.428500
97,1940-01-05 01:00:00,11.378500
98,1940-01-05 02:00:00,15.028501
