In [20]:
import boto3
import pandas as pd

In [21]:
dynamo_client = boto3.resource('dynamodb',
                    region_name='us-west-2')
    
table = dynamo_client.Table('raw_weather_data')

In [22]:
response = table.scan()
weather_conditions = response['Items']
weather_conditions


[{'feels_like': '6.84',
  'max_temp': '9.54',
  'name': 'Liège',
  'temperature': '8.65',
  'dt': Decimal('1744015275'),
  'longitude': '5.5675',
  'commune': 'Liège',
  'min_temp': '8',
  'humidity': '39',
  'description': 'clear sky',
  'latitude': '50.6337',
  'cloud_pct': '0',
  'weather': 'Clear',
  'wind_speed': Decimal('11.12')},
 {'feels_like': '6.84',
  'max_temp': '9.54',
  'name': 'Liège',
  'temperature': '8.65',
  'dt': Decimal('1744016103'),
  'longitude': '5.5675',
  'commune': 'Liège',
  'min_temp': '8',
  'humidity': '39',
  'description': 'clear sky',
  'latitude': '50.6337',
  'cloud_pct': '0',
  'weather': 'Clear',
  'wind_speed': Decimal('11.12')},
 {'feels_like': '6.84',
  'max_temp': '9.54',
  'name': 'Liège',
  'temperature': '8.65',
  'dt': Decimal('1744016403'),
  'longitude': '5.5675',
  'commune': 'Liège',
  'min_temp': '8',
  'humidity': '39',
  'description': 'clear sky',
  'latitude': '50.6337',
  'cloud_pct': '0',
  'weather': 'Clear',
  'wind_speed': De

In [23]:
df = pd.DataFrame(weather_conditions)

In [24]:
df.shape

(1625, 14)

In [25]:
df['name'].value_counts()

name
Brussels     169
Bruges       167
Liège        166
Charleroi    164
Leuven       164
Antwerp      162
Ghent        161
Mons         161
Namur        157
Ostend       154
Name: count, dtype: int64

In [26]:
# Transform the data stores as string to numerical

df['temperature'] = df['temperature'].astype(float)
df['min_temp'] = df['min_temp'].astype(float)
df['max_temp'] = df['max_temp'].astype(float)
df['feels_like'] = df['feels_like'].astype(float)
df['longitude'] = df['longitude'].astype(float)
df['latitude'] = df['latitude'].astype(float)
df['humidity'] = df['humidity'].astype(int)
df['cloud_pct'] = df['cloud_pct'].astype(int)
df['wind_speed'] = df['wind_speed'].astype(float)

In [27]:
# Let's do some Exploratory Data Analysis

grouped_df = df.groupby('name')
new_df = grouped_df['temperature'].mean().round(1)
print('Average temperatures:')
new_df

Average temperatures:


name
Antwerp      9.2
Bruges       9.1
Brussels     8.5
Charleroi    8.0
Ghent        9.3
Leuven       8.5
Liège        8.9
Mons         8.3
Namur        8.5
Ostend       8.8
Name: temperature, dtype: float64

In [28]:
warmest_city = grouped_df['temperature'].mean().round(2).sort_values(ascending=False).head(1)
print(f'The warmest city at the moment is: {warmest_city.index[0]} with {warmest_city.values[0]} degrees celcius.')



The warmest city at the moment is: Ghent with 9.3 degrees celcius.


In [29]:
chilliest_city = grouped_df['temperature'].mean().round(2).sort_values().head(1)
print(f'The chilliest city at the moment is: {chilliest_city.index[0]} with {chilliest_city.values[0]} degrees celcius.')


The chilliest city at the moment is: Charleroi with 8.01 degrees celcius.


In [None]:
# Double check how many entries there are per city:

scores = dict()

for item in weather_conditions:
    for key, val in item.items():
        if key == 'name':
            city = val
            if city not in scores.keys():
                scores.update({city: 1})
            else:
                number = scores.get(city)
                number += 1
                scores.update({f'{city}' : number})

for city in scores.items():
    print(city)
print(f'\n Total records: {len(weather_conditions)}')


('Liège', 166)
('Antwerp', 162)
('Ghent', 161)
('Namur', 157)
('Brussels', 169)
('Ostend', 154)
('Bruges', 167)
('Charleroi', 164)
('Mons', 161)
('Leuven', 164)

 Total records: 1625
