### 1.	Data Collection and Preprocessing:

In [28]:
pip install azure-maps-search --pre

Collecting azure-maps-search
  Downloading azure_maps_search-1.0.0b2-py3-none-any.whl.metadata (18 kB)
Collecting msrest>=0.6.21 (from azure-maps-search)
  Downloading msrest-0.7.1-py3-none-any.whl.metadata (21 kB)
Collecting azure-common~=1.1 (from azure-maps-search)
  Downloading azure_common-1.1.28-py2.py3-none-any.whl.metadata (5.0 kB)
Collecting azure-mgmt-core<2.0.0,>=1.3.0 (from azure-maps-search)
  Downloading azure_mgmt_core-1.4.0-py3-none-any.whl.metadata (4.1 kB)
Collecting azure-core<2.0.0,>=1.26.2 (from azure-mgmt-core<2.0.0,>=1.3.0->azure-maps-search)
  Downloading azure_core-1.30.2-py3-none-any.whl.metadata (37 kB)
Collecting isodate>=0.6.0 (from msrest>=0.6.21->azure-maps-search)
  Downloading isodate-0.6.1-py2.py3-none-any.whl.metadata (9.6 kB)
Collecting requests-oauthlib>=0.5.0 (from msrest>=0.6.21->azure-maps-search)
  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.5.0->msrest>=0.6.21->

In [130]:
import os
import requests
import pandas as pd
from datetime import datetime, timedelta
from azure.core.credentials import AzureKeyCredential
from azure.maps.search import MapsSearchClient
import time

SUBSCRIPTION_KEY = '6Hk0BuzXAh248HIBaBisjFdj0Y7bi97o09bOZMSyakK7IVyu9zSLJQQJ99AGAC8vTInLFAINAAAgAZMP9xmM'  # Replace with your actual subscription key

def search_address(query):
    maps_search_client = MapsSearchClient(credential=AzureKeyCredential(SUBSCRIPTION_KEY))
    result = maps_search_client.search_address(query=query)
    if len(result.results) > 0:
        lat = result.results[0].position.lat
        lon = result.results[0].position.lon
        print(f"Coordinate: {lat}, {lon}")
        return lat, lon
    else:
        print("No address found")
        return None, None

def fetch_historical_weather_data(lat, lon, start_date, end_date):
    params = {
        'api-version': '1.1',
        'query': f"{lat},{lon}",
        'startDate': start_date.strftime('%Y-%m-%d'),
        'endDate': end_date.strftime('%Y-%m-%d'),
        'subscription-key': SUBSCRIPTION_KEY,
    }
    max_retries = 10
    retry_count = 0
    while retry_count < max_retries:
        try:
            response = requests.get("https://atlas.microsoft.com/weather/historical/actuals/daily/json", params=params)
            if response.status_code == 200:
                data = response.json()
                print(f"Fetched data for {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
                return data
            else:
                print(f"Error fetching data: {response.status_code}, {response.text}")
                retry_count += 1
                if retry_count < max_retries:
                    print(f"Retry attempt {retry_count} in 10 seconds...")
                    time.sleep(10)  # Wait for 10 seconds before retrying
                else:
                    print(f"Failed to fetch data after {max_retries} retries. Skipping this date range.")
                    return None
        except requests.exceptions.ConnectionError as e:
            print(f"ConnectionError: {e}. Retrying...")
            retry_count += 1
            if retry_count < max_retries:
                print(f"Retry attempt {retry_count} in 10 seconds...")
                time.sleep(10)  # Wait for 10 seconds before retrying
            else:
                print(f"Failed to fetch data after {max_retries} retries. Skipping this date range.")
                return None
    
    return None

def preprocess_data(raw_data, location_name):
    processed_data = []
    for day_data in raw_data['results']:
        processed_data.append({
            'location_name': location_name,
            'timestamp': datetime.strptime(day_data['date'], '%Y-%m-%dT%H:%M:%S%z'),  # Convert date string to datetime object
            'temperature_min': day_data['temperature']['minimum']['value'],
            'temperature_max': day_data['temperature']['maximum']['value'],
            'temperature_avg': day_data['temperature']['average']['value'],
            'precipitation': day_data['precipitation']['value'],
            'snowfall': day_data['snowfall']['value'],
            'snow_depth': day_data['snowDepth']['value'],
            'heating_degree_days': day_data['degreeDaySummary']['heating']['value'],
            'cooling_degree_days': day_data['degreeDaySummary']['cooling']['value']
        })
    df = pd.DataFrame(processed_data)
    return df

def file_exists_for_date_range(start_date, end_date):
    file_name_pattern = f'historical_weather_data_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.csv'
    return os.path.exists(file_name_pattern)

if __name__ == "__main__":
    location = "San Francisco, CA"  # Example location
    lat, lon = search_address(location)
    
    if lat and lon:
        end_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1)
        ten_years_ago = datetime.now() - timedelta(days=3652)  # Approximately 10 years ago
        
        start_date = end_date - timedelta(days=30)  # Initial start date 30 days before end_date
        
        while start_date > ten_years_ago:
            if start_date < ten_years_ago + timedelta(days=30):
                start_date = ten_years_ago
            
            if not file_exists_for_date_range(start_date, end_date):
                raw_data = fetch_historical_weather_data(lat, lon, start_date, end_date)
                
                if raw_data:
                    weather_df = preprocess_data(raw_data, location)
                    file_name = f'historical_weather_data_{start_date.strftime("%Y%m%d")}_{end_date.strftime("%Y%m%d")}.csv'
                    weather_df.to_csv(file_name, index=False)
                    print(f"Weather data saved to {file_name}")
                
                end_date = start_date - timedelta(days=1)
                start_date = end_date - timedelta(days=30)
            else:
                print(f"Data for {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')} already exists. Skipping.")
                end_date = start_date - timedelta(days=1)
                start_date = end_date - timedelta(days=30)


Coordinate: 37.78008, -122.42016
ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)). Retrying...
Retry attempt 1 in 10 seconds...
ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)). Retrying...
Retry attempt 2 in 10 seconds...
Fetched data for 2024-06-06 to 2024-07-06
Weather data saved to historical_weather_data_20240606_20240706.csv
ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)). Retrying...
Retry attempt 1 in 10 seconds...
Fetched data for 2024-05-06 to 2024-06-05
Weather data saved to historical_weather_data_20240506_20240605.csv
Fetched data for 2024-04-05 to 2024-05-05
Weather data saved to historical_weather_data_20240405_20240505.csv
Fetched data for 2024-03-05 to 2024-0