In [2]:
import pandas as pd
import requests
from datetime import datetime
import pytz
import os

### __get and store weather for AWS lambda function__

In [18]:
# connection to gans database in cloud (AWS)

schema = 'gans'
host = 'wbs-project4-db.cdf7pal02v3u.eu-central-1.rds.amazonaws.com'
user = 'admin'
password = 'Your RDS Instance password'
port = 3306
con = f"mysql+pymysql://{user}:{password}@{host}:{port}/{schema}"

In [9]:
### CITY_DATA 
# extracting city_data from sql db - use to test output of function

'''
city_data = pd.read_sql_table(
    'cities', con, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None
                             )  #pandas.read_sql_table(table_name, con, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None)
city_data.rename(columns={'city_name':'city'},inplace=True)

cities = city_data["city"].to_list()
cities
'''

In [14]:
# create a function to capture the full process, (details on weather.ipynb)
# this function can now be copied to the Lambda_handler function in AWS

def get_weather_norm(cities):
    ###
    # to get a timestamp showing when the data was retieved 
    # Datetime uses the uses the current time of the system, - on local computers this is normally correct 
    # in the cloud, computers are not always in the same country, therefore the timezone module is required 
    # to ensure that the timestamp is local to user and not the computer
    tz = pytz.timezone('Europe/Berlin')
    now = datetime.now().astimezone(tz)
    
    # city_data is extracted from sql db as it's static data, also enables merge to get city id
    city_data = pd.read_sql_table(
        'cities', con=con, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None)
    city_data.rename(columns={'city_name':'city'},inplace=True)
    cities = city_data["city"].to_list()
    
    df_list = []
    API_key = '80c5ac07e66f09d7ed9ab87d34d61b91'
    url = (f"http://api.openweathermap.org/data/2.5/forecast?q=Berlin&appid={API_key}&units=metric") 
    test = requests.get(url)
    if test.status_code >= 200 and test.status_code <= 299:
        print(test.status_code)
        for city in cities:  
            url = (f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric") 
            response = requests.get(url)
            json = response.json()
            json_norm_df = pd.json_normalize(json['list'],
                                             record_path=['weather'],
                                             meta=['dt_txt',
                                                   ['main', 'temp'],
                                                   ['main', 'feels_like'],
                                                   ['clouds', 'all'],
                                                   ['rain', '3h'],
                                                   ['snow', '3h'],
                                                   ['wind', 'speed'],
                                                   ['main', 'humidity'],], 
                                             errors='ignore')
            json_norm_df['city'] = city
            json_norm_df.drop(columns=['id', 'icon','description'], inplace=True)
            json_norm_df.rename(columns={'main': 'outlook',                                        
                                         'dt_txt': 'forecast_time',
                                         'main.temp': 'temperature',
                                         'main.feels_like': 'temp_feels_like',
                                         'clouds.all': 'clouds',
                                         'rain.3h': 'rain',
                                         'snow.3h': 'snow',
                                         'wind.speed': 'wind_speed',
                                         'main.humidity': 'humidity',},
                                inplace=True)
            json_norm_df['information_retrieved_at'] = now.strftime("%d/%m/%Y %H:%M:%S")
            json_norm_df[['city', 'forecast_time', 'outlook',
                          'temperature', 'temp_feels_like', 'clouds', 'rain', 'snow',
                          'wind_speed', 'humidity','information_retrieved_at']]
            df_list.append(json_norm_df)
    else:
        print(test.status_code)
        return -1
    weather_output = pd.concat(df_list, ignore_index=True)
    
    # obtain city_id for weather_conditions table in sql db from city_data
    weather_output = weather_output.merge(city_data[["city_id", "city"]], how = "left", on = "city")
    
    # set data types
    weather_output = weather_output.astype({'temperature':float,'temp_feels_like':float,'clouds':float,'rain':float,'wind_speed':float,'snow':float,'humidity':float,})
    weather_output["forecast_time"] = pd.to_datetime(weather_output["forecast_time"])
    
    
    return weather_output # drop return when using in lambda function AWS

In [146]:
#get_weather_norm(['Cork', 'London'])

In [None]:
# test using the cities_list as the input - uncomment the city_data cell above  

weather_data = get_weather_norm(cities) 
#weather_data

In [None]:
#weather_data.head()

In [17]:
weather_data.dtypes

outlook                             object
forecast_time               datetime64[ns]
temperature                        float64
temp_feels_like                    float64
clouds                             float64
rain                               float64
snow                               float64
wind_speed                         float64
humidity                           float64
city                                object
information_retrieved_at            object
city_id                             object
dtype: object

In [87]:
weather_data.to_csv("weather_data.csv")