imports

In [2]:
from pymongo import MongoClient as mc

In [3]:
client = mc('mongodb://localhost:27017/')
db = client['project']
weather = db['weather']

number of records for each country

In [13]:
pipeline = [
  {'$group': {
      '_id': "$city.country",
      'recordCount': { '$sum': 1 }}},
  {'$sort': { 'recordCount': -1 }}
]

for doc in weather.aggregate(pipeline):
    print(doc)

{'_id': 'DE', 'recordCount': 28786}
{'_id': 'US', 'recordCount': 19972}
{'_id': 'FR', 'recordCount': 19965}
{'_id': 'ES', 'recordCount': 15439}
{'_id': 'IT', 'recordCount': 9878}
{'_id': 'RU', 'recordCount': 8768}
{'_id': 'CN', 'recordCount': 8715}
{'_id': 'ID', 'recordCount': 6590}
{'_id': 'AU', 'recordCount': 5593}
{'_id': 'PH', 'recordCount': 4962}
{'_id': 'GB', 'recordCount': 4882}
{'_id': 'CH', 'recordCount': 4145}
{'_id': 'PL', 'recordCount': 4100}
{'_id': 'AT', 'recordCount': 3682}
{'_id': 'IN', 'recordCount': 3634}
{'_id': 'BR', 'recordCount': 3621}
{'_id': 'CA', 'recordCount': 3302}
{'_id': 'PT', 'recordCount': 3231}
{'_id': 'RO', 'recordCount': 2872}
{'_id': 'MX', 'recordCount': 2426}
{'_id': 'AR', 'recordCount': 1743}
{'_id': 'UA', 'recordCount': 1679}
{'_id': 'CZ', 'recordCount': 1496}
{'_id': 'BE', 'recordCount': 1452}
{'_id': 'JP', 'recordCount': 1402}
{'_id': 'BG', 'recordCount': 1370}
{'_id': 'TR', 'recordCount': 1327}
{'_id': 'GR', 'recordCount': 1323}
{'_id': 'IE', 'r

country centroids & number of cities

In [27]:
pipeline = [
    # First group by city to get unique city coordinates
    {
        "$group": {
            "_id": {
                "country": "$city.country",
                "city": "$city.name"  # Assuming there's a city name field
            },
            "lat": {"$first": "$city.coord.lat"},  # Get one lat/lon per city
            "lon": {"$first": "$city.coord.lon"}
        }
    },
    # Then group by country to calculate averages
    {
        "$group": {
            "_id": "$_id.country",
            "avg_lat": {"$avg": "$lat"},
            "avg_lon": {"$avg": "$lon"},
            "cities": {"$sum": 1}  # Count of unique cities
        }
    },
    {"$sort": {"cities": -1}}
]

countries = {}

for doc in weather.aggregate(pipeline):
    countries[doc["_id"]] = {
        "centroid": (doc["avg_lat"], doc["avg_lon"]),
        "cities": doc["cities"]
	}

In [28]:
countries

{'DE': {'centroid': (50.958576323608334, 10.156532928721276), 'cities': 20497},
 'US': {'centroid': (38.1189975430701, -91.66484866872933), 'cities': 14221},
 'FR': {'centroid': (46.821523459408766, 2.040104607593037), 'cities': 13328},
 'ES': {'centroid': (40.64691215724382, -3.457004932862191), 'cities': 10754},
 'IT': {'centroid': (43.40612607339873, 11.550135004616273), 'cities': 8665},
 'RU': {'centroid': (53.98542550457135, 51.93542155187492), 'cities': 7547},
 'CN': {'centroid': (31.392651578923633, 113.81783374128082), 'cities': 6652},
 'ID': {'centroid': (-7.1309019774011295, 111.88554116347731), 'cities': 5487},
 'AU': {'centroid': (-33.199364167728234, 144.75308842983978), 'cities': 5181},
 'GB': {'centroid': (52.87327746323196, -1.9436701123467996), 'cities': 4406},
 'PH': {'centroid': (11.840234546477365, 122.70205502089472), 'cities': 3733},
 'PL': {'centroid': (51.52374222558538, 19.489989541976012), 'cities': 3502},
 'IN': {'centroid': (21.843337900404155, 79.2360854206

In [29]:
import folium
from folium.plugins import MarkerCluster

In [32]:
m = folium.Map(location=[35.2866, 3.3774], zoom_start=2)

for country, data in countries.items():
    lat, lon = data["centroid"]
    
    # Create popup with country info
    popup_text = f"""
    <b>Country:</b> {country}<br>
    <b>Centroid:</b> {lat:.4f}, {lon:.4f}<br>
    <b>Cities:</b> {data['cities']}
    """
	# Add marker to map
    folium.Marker(
        location=[lat, lon],
        popup=folium.Popup(popup_text, max_width=250),
        icon=folium.Icon(color='blue', icon='globe', prefix='fa')
    ).add_to(m)

In [33]:
m.save('citycounts.html')

temperature cluster

In [39]:
from sklearn.cluster import KMeans
import numpy as np
from branca.colormap import linear

In [43]:
# 1. Fetch weather data with coordinates
pipeline = [
    {
        "$group": {
            "_id": {
                "city": "$city.name",
                "country": "$city.country"
            },
            "lat": {"$first": "$city.coord.lat"},
            "lon": {"$first": "$city.coord.lon"},
            "temp": {"$avg": "$main.temp"},
            "humidity": {"$avg": "$main.humidity"},
            "pressure": {"$avg": "$main.pressure"}
        }
    },
    {
        "$project": {
            "_id": 0,
            "city": "$_id.city",
            "country": "$_id.country",
            "lat": 1,
            "lon": 1,
            "temp": 1,
            "humidity": 1,
            "pressure": 1
        }
    }
]

data = list(weather.aggregate(pipeline))

In [44]:
# 2. Prepare data for clustering
features = np.array([
    [d['temp']] 
    for d in data
])

In [45]:
# 3. Perform K-Means clustering (let's use 5 clusters)
n_clusters = 10
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
clusters = kmeans.fit_predict(features)

# Add cluster labels back to our data
for i, d in enumerate(data):
    d['cluster'] = int(clusters[i])

In [48]:
# 4. Create a colored map
m = folium.Map(zoom_start=3)

# Create a color map for clusters
colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 
          'darkblue', 'darkgreen', 'cadetblue', 'darkpurple']
colormap = linear.Set3_12.scale(0, n_clusters-1)

# Add each city to the map with cluster color
for city in data:
    folium.CircleMarker(
        location=[city['lat'], city['lon']],
        radius=5,
        popup=f"""
        <b>{city['city']}, {city['country']}</b><br>
        Cluster: {city['cluster']}<br>
        Temp: {city['temp']:.1f}K<br>
        Humidity: {city['humidity']}%<br>
        Pressure: {city['pressure']} hPa
        """,
        color=colors[city['cluster'] % len(colors)],
        fill=True,
        fill_color=colors[city['cluster'] % len(colors)]
    ).add_to(m)

# Add cluster legend
colormap.caption = 'Weather Condition Clusters'
m.add_child(colormap)

# Save and display
m.save('temp_clusters.html')
# display(m)  # For Jupyter notebook, otherwise open the HTML file


In [47]:
client.close()