**Imports**

In [16]:
#imports
import requests
import json
import re
import pandas as pd
from utilities import flatten_nested_dict
import os
import re
import configparser
from dotenv import load_dotenv

**Configs**

In [17]:
#pandas config for display
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

#base path
base_path = os.getcwd()

#read config file and get api key and locations
config = configparser.ConfigParser(allow_no_value=True)
config.read('cfg.ini')
API_KEY = os.getenv('API_KEY')
locations = config.options('Locations')

#path for base directory for the weather api response files
path_to_response_file_directory = os.path.join(base_path, f"weather_api_response_files{os.sep}")
print("path_to_response_file_directory:", path_to_response_file_directory)

path_to_response_file_directory: c:\Users\risha\WeatherETL\weather_api_response_files\


**Functions**

In [18]:
#get geo codes for locations through api
def get_codes_for_location(location:list|str)->pd.DataFrame:
    geo_codes_location_dict = {
        'name': [],
        'lat': [],
        'lon': [],
        'country': [],  
        'state': []
    }
    if(type(location) == str):
        location = [location]
    for each in location: 
        geo_coding_api_url = f"http://api.openweathermap.org/geo/1.0/direct?q={each}&limit=5&appid={API_KEY}"

        geo_code_response = requests.get(geo_coding_api_url)

        if geo_code_response.status_code != 200:
            print("Error")
            return None
        else:
            geo_code_response = geo_code_response.content
            #manual decoding for byte literal response and converting it into a dict .decode didn't work
            new_geo_code_response = eval(re.sub("^b[']|[']$", "", str(geo_code_response).replace(r"\x", "")))
            geo_codes_location_dict['name'].append(new_geo_code_response[0]['name'])
            geo_codes_location_dict['lat'].append(new_geo_code_response[0]['lat'])
            geo_codes_location_dict['lon'].append(new_geo_code_response[0]['lon'])
            geo_codes_location_dict['country'].append(new_geo_code_response[0]['country'])
            geo_codes_location_dict['state'].append(new_geo_code_response[0]['state'])
        
        geo_codes_df = pd.DataFrame(geo_codes_location_dict)
    return geo_codes_df

def get_weather_data_and_write_to_csv(geo_codes_df: pd.DataFrame)->pd.DataFrame:
    select_cols = ['coord_lon', 'coord_lat', 'weather_0_main', 'weather_0_description', 'main_temp','main_feels_like','main_temp_min','main_temp_max'\
            ,'main_pressure','main_humidity','main_sea_level','main_grnd_level','visibility','wind_speed','wind_deg','wind_gust','clouds_all'\
            , 'dt','sys_country','sys_sunrise','sys_sunset','timezone','name']
    
    #ensure base path exists
    os.makedirs(path_to_response_file_directory, exist_ok= True)
    for each in geo_codes_df.itertuples():
        lon = each.lon
        lat = each.lat
        name = each.name
        country = each.country
        state = each.state
        # weather data api url
        weather_data_api =f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={API_KEY}"
        #decode and store the response
        weather_data_api_response = eval(requests.get(weather_data_api).content.decode('utf-8'))
        # flatten and create df
        df_weather_data = pd.DataFrame(flatten_nested_dict(weather_data_api_response), index= [0])
        df_weather_data_select_cols = df_weather_data[select_cols]
        # print(df_weather_data.dtypes)
        #ensure file path exists for writing the df
        file_path = f"{country}{os.sep}{state}{os.sep}"
        full_path = os.path.join(path_to_response_file_directory, file_path)
        os.makedirs(full_path, exist_ok= True)
        # save the df into a file
        print(f"Inserting raw data to path: {full_path}", f"file name: weather_data_{name.lower()}.csv")
        # Write to CSV, include header only if the file doesn't exist
        df_weather_data_select_cols.to_csv(f'{full_path}weather_data_{name.lower()}.csv', sep= '|', index= False, mode = 'a', header= not(os.path.isfile(f'{full_path}weather_data_{name.lower()}.csv')))


**Main**

In [19]:
if(__name__ == "__main__"):
    for loc in locations:
        geo_codes_df = get_codes_for_location(location= loc)
        path_to_data = get_weather_data_and_write_to_csv(geo_codes_df)


Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\IN\Delhi\ file name: weather_data_delhi.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\IN\Maharashtra\ file name: weather_data_mumbai.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\US\Texas\ file name: weather_data_dallas.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\US\New York\ file name: weather_data_new york.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\IN\Tamil Nadu\ file name: weather_data_chennai.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\PH\Camarines Sur\ file name: weather_data_goa.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\US\Florida\ file name: weather_data_miami.csv
Inserting raw data to path: c:\Users\risha\WeatherETL\weather_api_response_files\US\Missouri\ fi