In [79]:
#imports
import time
import logging
import pandas as pd
import boto3
import os
from io import StringIO, BytesIO
import json


In [80]:
#setup bucket info
session = boto3.Session(
    aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
    aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY']
)
s3 = session.resource(service_name='s3')
bucket_name = 'de-city-weather'
bucket = s3.Bucket(bucket_name)
client = session.client('s3')

#Get list of files in folder
files = [
    obj.key for obj in bucket.objects.filter(Prefix='data_files') 
]

In [81]:
full_df = pd.DataFrame()
obj = s3.Object(bucket.name, files[0])
file_content = obj.get().get('Body').read()
with BytesIO(file_content) as bio:
    df = pd.read_json(bio)
weather_df = df.transpose()
weather_df.head()


Unnamed: 0,dt,sunrise,sunset,temp,feels_like,pressure,humidity,dew_point,uvi,clouds,visibility,wind_speed,wind_deg,wind_gust
Athens,1716155000.0,1716175000.0,1716226000.0,21.91,22.04,1011.0,72.0,16.64,0.0,83.0,10000.0,2.24,125.0,4.47
Madrid,1716155000.0,1716094000.0,1716147000.0,14.01,13.03,1012.0,60.0,6.38,0.0,0.0,10000.0,3.09,340.0,
Copenhagen,1716155000.0,1716087000.0,1716146000.0,13.8,13.3,1013.0,79.0,10.23,0.0,1.0,10000.0,2.06,250.0,
Dublin,1716155000.0,1716092000.0,1716150000.0,13.2,12.85,1017.0,87.0,11.09,0.0,0.0,10000.0,3.09,90.0,
Paris,1716155000.0,1716091000.0,1716147000.0,16.14,15.82,1012.0,77.0,12.11,0.0,0.0,10000.0,2.57,340.0,


In [82]:
weather_df.columns

Index(['dt', 'sunrise', 'sunset', 'temp', 'feels_like', 'pressure', 'humidity',
       'dew_point', 'uvi', 'clouds', 'visibility', 'wind_speed', 'wind_deg',
       'wind_gust'],
      dtype='object')

In [83]:
weather_df = weather_df.drop([
    'feels_like', 'pressure', 'dew_point', 'clouds', 
    'visibility', 'wind_speed', 'wind_deg', 'wind_gust'], axis=1)
weather_df.head()

Unnamed: 0,dt,sunrise,sunset,temp,humidity,uvi
Athens,1716155000.0,1716175000.0,1716226000.0,21.91,72.0,0.0
Madrid,1716155000.0,1716094000.0,1716147000.0,14.01,60.0,0.0
Copenhagen,1716155000.0,1716087000.0,1716146000.0,13.8,79.0,0.0
Dublin,1716155000.0,1716092000.0,1716150000.0,13.2,87.0,0.0
Paris,1716155000.0,1716091000.0,1716147000.0,16.14,77.0,0.0


In [84]:
weather_df[['dt', 'sunrise', 'sunset']] = pd.to_datetime(weather_df[['dt', 'sunrise', 'sunset']].stack(), unit='s').unstack()
weather_df['dt'] = weather_df['dt'].dt.date
weather_df['sunrise'] = weather_df['sunrise'].dt.time
weather_df['sunset'] = weather_df['sunset'].dt.time
weather_df.head()

Unnamed: 0,dt,sunrise,sunset,temp,humidity,uvi
Athens,2024-05-19,03:10:49,17:32:40,21.91,72.0,0.0
Madrid,2024-05-19,04:54:53,19:28:00,14.01,60.0,0.0
Copenhagen,2024-05-19,02:51:07,19:21:34,13.8,79.0,0.0
Dublin,2024-05-19,04:18:38,20:24:42,13.2,87.0,0.0
Paris,2024-05-19,04:03:38,19:30:50,16.14,77.0,0.0


In [85]:
weather_df['temp_imperial'] = round((weather_df['temp'] * 1.8) + 32, 2)
weather_df.head()

Unnamed: 0,dt,sunrise,sunset,temp,humidity,uvi,temp_imperial
Athens,2024-05-19,03:10:49,17:32:40,21.91,72.0,0.0,71.44
Madrid,2024-05-19,04:54:53,19:28:00,14.01,60.0,0.0,57.22
Copenhagen,2024-05-19,02:51:07,19:21:34,13.8,79.0,0.0,56.84
Dublin,2024-05-19,04:18:38,20:24:42,13.2,87.0,0.0,55.76
Paris,2024-05-19,04:03:38,19:30:50,16.14,77.0,0.0,61.05


In [86]:
weather_df = weather_df.reset_index()
weather_df.columns

Index(['index', 'dt', 'sunrise', 'sunset', 'temp', 'humidity', 'uvi',
       'temp_imperial'],
      dtype='object')

In [87]:
weather_df = weather_df.rename(columns={"index": "city", "dt": "date", "temp": "temp_celsius", "uvi": "uv_index"})
weather_df

Unnamed: 0,city,date,sunrise,sunset,temp_celsius,humidity,uv_index,temp_imperial
0,Athens,2024-05-19,03:10:49,17:32:40,21.91,72.0,0.0,71.44
1,Madrid,2024-05-19,04:54:53,19:28:00,14.01,60.0,0.0,57.22
2,Copenhagen,2024-05-19,02:51:07,19:21:34,13.8,79.0,0.0,56.84
3,Dublin,2024-05-19,04:18:38,20:24:42,13.2,87.0,0.0,55.76
4,Paris,2024-05-19,04:03:38,19:30:50,16.14,77.0,0.0,61.05
5,London,2024-05-19,04:02:39,19:51:37,15.17,82.0,0.0,59.31
6,Berlin,2024-05-19,03:04:05,19:02:03,16.87,87.0,0.0,62.37
7,Geneva,2024-05-19,10:45:59,01:32:20,22.13,37.0,1.68,71.83
8,Prague,2024-05-19,03:10:31,18:47:22,12.65,86.0,0.0,54.77
9,Warsaw,2024-05-19,02:34:57,18:30:14,15.57,71.0,0.0,60.03


In [88]:
weather_df = weather_df[['city', 'date', 'sunrise', 'sunset', 'temp_celsius', 'temp_imperial', 'humidity', 'uv_index']]
weather_df

Unnamed: 0,city,date,sunrise,sunset,temp_celsius,temp_imperial,humidity,uv_index
0,Athens,2024-05-19,03:10:49,17:32:40,21.91,71.44,72.0,0.0
1,Madrid,2024-05-19,04:54:53,19:28:00,14.01,57.22,60.0,0.0
2,Copenhagen,2024-05-19,02:51:07,19:21:34,13.8,56.84,79.0,0.0
3,Dublin,2024-05-19,04:18:38,20:24:42,13.2,55.76,87.0,0.0
4,Paris,2024-05-19,04:03:38,19:30:50,16.14,61.05,77.0,0.0
5,London,2024-05-19,04:02:39,19:51:37,15.17,59.31,82.0,0.0
6,Berlin,2024-05-19,03:04:05,19:02:03,16.87,62.37,87.0,0.0
7,Geneva,2024-05-19,10:45:59,01:32:20,22.13,71.83,37.0,1.68
8,Prague,2024-05-19,03:10:31,18:47:22,12.65,54.77,86.0,0.0
9,Warsaw,2024-05-19,02:34:57,18:30:14,15.57,60.03,71.0,0.0
