In [1]:
import datetime as dt
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import requests
import seaborn as sns
import sqlite3

## Cities

The following code uses a public API to generate cities data.

In [2]:
cities = ['Austin', 'Cleveland', 'New York', 'San Francisco', 'Seattle']
cities_data = []

for city in cities:
    url = f'https://nominatim.openstreetmap.org/search?q={city}&format=json&limit=1'
    headers = {'User-Agent': 'ColabNotebook/1.0'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx).
        data = response.json()
        if data:
            cities_data.append({
                'city': city,
                'latitude': float(data[0]['lat']),
                'longitude': float(data[0]['lon'])
            })
        else:
            print(f'No data for {city}')
    except requests.exceptions.RequestException as e:
        print(f'Error fetching data for {city}: {e}')

cities_data

[{'city': 'Austin', 'latitude': 30.2711286, 'longitude': -97.7436995},
 {'city': 'Cleveland', 'latitude': 41.4996574, 'longitude': -81.6936772},
 {'city': 'New York', 'latitude': 40.7127281, 'longitude': -74.0060152},
 {'city': 'San Francisco', 'latitude': 37.7879363, 'longitude': -122.4075201},
 {'city': 'Seattle', 'latitude': 47.6038321, 'longitude': -122.330062}]

## Orders

The following code uses the city data to generate random order data.

In [3]:
rng = pd.date_range(end=pd.Timestamp.today().normalize(), periods=60, freq='D')
orders = []

for city_data in cities_data:
    city_name = city_data['city']
    for date in rng:
        amount = round(random.uniform(10.0, 500.0), 2) # random amount between 10.0 and 500.0
        orders.append({
            'city': city_name,
            'date': date.strftime('%Y-%m-%d'), # Format the date as an ISO 8601 string.
            'amount': amount
        })

print(f'Generated {len(orders)} orders.')

# Display several orders spread across the data to check the structure.
len(orders), orders[::50]

Generated 300 orders.


(300,
 [{'city': 'Austin', 'date': '2025-10-13', 'amount': 337.56},
  {'city': 'Austin', 'date': '2025-12-02', 'amount': 394.11},
  {'city': 'Cleveland', 'date': '2025-11-22', 'amount': 377.35},
  {'city': 'New York', 'date': '2025-11-12', 'amount': 334.53},
  {'city': 'San Francisco', 'date': '2025-11-02', 'amount': 276.06},
  {'city': 'Seattle', 'date': '2025-10-23', 'amount': 133.98}])

## SQL Database

The following code uses Pandas to import the data into a SQLite database.

In [4]:
# Make an in-memory SQLite database.
conn = sqlite3.connect(':memory:')

# Make a Pandas DataFrame for the cities and add its data to a SQL table.
cities_df = pd.DataFrame(cities_data)
cities_df.to_sql('cities', conn, if_exists='replace', index=False)

# Make a Pandas DataFrame for the orders and add its data to a SQL table.
orders_df = pd.DataFrame(orders)
orders_df.to_sql('orders', conn, if_exists='replace', index=False)

# Inspect the tables.
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type = 'table';")
tables = cursor.fetchall()
print('Tables in database:', tables)

# Show the first few rows of the 'cities' table.
print("\nFirst three rows of 'cities' table:")
query_cities = pd.read_sql_query("SELECT * FROM cities LIMIT 3;", conn)
display(query_cities)

# Show the first few rows of the 'orders' table.
print("\nFirst three rows of 'orders' table:")
query_orders = pd.read_sql_query("SELECT * FROM orders LIMIT 3;", conn)
display(query_orders)

Tables in database: [('cities',), ('orders',)]

First three rows of 'cities' table:


Unnamed: 0,city,latitude,longitude
0,Austin,30.271129,-97.7437
1,Cleveland,41.499657,-81.693677
2,New York,40.712728,-74.006015



First three rows of 'orders' table:


Unnamed: 0,city,date,amount
0,Austin,2025-10-13,337.56
1,Austin,2025-10-14,353.11
2,Austin,2025-10-15,46.54


## Joining and aggregating

The following code joins the two tables and aggregates the order amount by city.

In [5]:
sql_query = """
SELECT c.city, c.latitude, c.longitude, o.date, o.amount
FROM cities AS c
INNER JOIN orders AS o
ON c.city = o.city;
"""
joined_df = pd.read_sql_query(sql_query, conn)
print(f'{len(joined_df)} rows')
joined_df[::50]

300 rows


Unnamed: 0,city,latitude,longitude,date,amount
0,Austin,30.271129,-97.7437,2025-10-13,337.56
50,Austin,30.271129,-97.7437,2025-12-02,394.11
100,Cleveland,41.499657,-81.693677,2025-11-22,377.35
150,New York,40.712728,-74.006015,2025-11-12,334.53
200,San Francisco,37.787936,-122.40752,2025-11-02,276.06
250,Seattle,47.603832,-122.330062,2025-10-23,133.98


In [6]:
sql_query = """
SELECT c.city, c.latitude, c.longitude, SUM(o.amount) as order_sum
FROM cities AS c
INNER JOIN orders AS o
ON c.city = o.city
GROUP BY c.city, c.latitude, c.longitude;
"""
aggregated_df = pd.read_sql_query(sql_query, conn)
print(f'{len(aggregated_df)} rows')
aggregated_df

5 rows


Unnamed: 0,city,latitude,longitude,order_sum
0,Austin,30.271129,-97.7437,15572.3
1,Cleveland,41.499657,-81.693677,14349.37
2,New York,40.712728,-74.006015,14796.65
3,San Francisco,37.787936,-122.40752,17061.2
4,Seattle,47.603832,-122.330062,14415.82


## API Access

The following code uses a public API to get current temperature data for the cities.

In [7]:
def get_current_temperature(latitude, longitude):
    url = f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current_weather=true"
    try:
        response = requests.get(url)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx).
        data = response.json()
        if 'current_weather' in data and 'temperature' in data['current_weather']:
            return data['current_weather']['temperature']
        else:
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching weather data: {e}")
        return None

temperature_series = cities_df.apply(lambda row: get_current_temperature(row['latitude'], row['longitude']), axis=1)
temperature_df = pd.concat([cities_df['city'], temperature_series.to_frame('temperature')], axis=1)
temperature_df

Unnamed: 0,city,temperature
0,Austin,12.8
1,Cleveland,-3.1
2,New York,-1.6
3,San Francisco,8.9
4,Seattle,8.3


## Data Persistence

The following code persists the temperature data as a CSV file.

In [8]:
temperature_df.to_csv('temperatures.csv', index=False)
with open('temperatures.csv', 'rt') as fin:
    print(fin.read())

city,temperature
Austin,12.8
Cleveland,-3.1
New York,-1.6
San Francisco,8.9
Seattle,8.3

