In [1]:
pip install meteostat

Note: you may need to restart the kernel to use updated packages.


In [2]:
from datetime import datetime
import matplotlib.pyplot as plt
from meteostat import Point, Daily
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import time
import folium
from folium import Marker
from folium.plugins import HeatMap

In [3]:
# Top 50 Schools

In [4]:
# List of universities

universities = ["Princeton University", "Massachusetts Institute of Technology", "Harvard University",
    "Stanford University", "Yale University", "California Institute of Technology",
    "Duke University", "Johns Hopkins University", "Northwestern University",
    "University of Pennsylvania", "Cornell University", "University of Chicago",
    "Brown University", "Columbia University", "Dartmouth College",
    "University of California-Los Angeles", "University of California-Berkeley", "Rice University",
    "University of Notre Dame", "Vanderbilt University", "Carnegie Mellon University",
    "University of Michigan-Ann Arbor", "Washington University in St. Louis", "Emory University",
    "Georgetown University", "University of Virginia", "University of North Carolina-Chapel Hill",
    "University of Southern California", "University of California-San Diego", "New York University",
    "University of Florida", "University of Texas-Austin", "Georgia Institute of Technology",
    "University of California-Davis", "University of California-Irvine", "University of Illinois-Urbana-Champaign",
    "Boston College", "Tufts University", "University of California-Santa Barbara",
    "University of Wisconsin-Madison", "Boston University", "Ohio State University-Columbus",
    "Rutgers University-New Brunswick", "University of Maryland-College Park", "University of Rochester",
    "Lehigh University", "Purdue University", "University of Georgia", "University of Washington",
    "Wake Forest University"]

In [5]:
# Building a function to find the coordinates of a given location (latitude & longitude)

In [6]:
def find_coordinates(location_name):
    """
    Finds the latitude and longitude of a given location name.

    """
    geolocator = Nominatim(user_agent="geo_locator")
    try:
        location = geolocator.geocode(location_name)
        return location.latitude, location.longitude
    except GeocoderTimedOut:
        time.sleep(1)  
        return find_coordinates(location_name)

In [7]:
# Initialize an empty list
data = []

# Iterate through the list of universities and fetch their coordinates
for university in universities:
    lat, lon = find_coordinates(university)
    data.append({"School Name": university, "Latitude": lat, "Longitude": lon})

# Create a DataFrame 
universities_df = pd.DataFrame(data)

In [8]:
universities_df.head()

Unnamed: 0,School Name,Latitude,Longitude
0,Princeton University,40.338675,-74.658365
1,Massachusetts Institute of Technology,42.358253,-71.096627
2,Harvard University,42.365743,-71.122214
3,Stanford University,37.431314,-122.169365
4,Yale University,41.257131,-72.98967


In [9]:
# Define timelines
timelines = {"Sep.1 - Dec.31": (datetime(2018, 9, 1), datetime(2018, 12, 31)),
    "Jan.1 - April.30": (datetime(2018, 1, 1), datetime(2018, 4, 30)),
    "May.1 - Aug.31": (datetime(2018, 5, 1), datetime(2018, 8, 31))}

# Iterate through each university and fetch weather data
for timeline_name, (start, end) in timelines.items():
    avg_tavg_column = f"{timeline_name} Avg TAVG"
    avg_prcp_column = f"{timeline_name} Avg PRCP"
    
    universities_df[avg_tavg_column] = None
    universities_df[avg_prcp_column] = None

    for index, row in universities_df.iterrows():
        location = Point(row["Latitude"], row["Longitude"])
        weather_data = Daily(location, start, end).fetch()

        # Calculate averages of tavg and prcp & add to DataFrame
        avg_tavg = weather_data["tavg"].mean()
        avg_prcp = weather_data["prcp"].mean()
        universities_df.at[index, avg_tavg_column] = avg_tavg
        universities_df.at[index, avg_prcp_column] = avg_prcp

In [10]:
# Replace NaNs with 0 
universities_df = universities_df.fillna(0)

In [11]:
# Display DataFrame
universities_df.head(50)

Unnamed: 0,School Name,Latitude,Longitude,Sep.1 - Dec.31 Avg TAVG,Sep.1 - Dec.31 Avg PRCP,Jan.1 - April.30 Avg TAVG,Jan.1 - April.30 Avg PRCP,May.1 - Aug.31 Avg TAVG,May.1 - Aug.31 Avg PRCP
0,Princeton University,40.338675,-74.658365,11.488525,3.429907,4.315,1.672727,22.952033,3.187619
1,Massachusetts Institute of Technology,42.358253,-71.096627,10.411475,4.347541,2.750833,3.891667,21.281301,2.906504
2,Harvard University,42.365743,-71.122214,10.411475,4.347541,2.750833,3.891667,21.281301,2.906504
3,Stanford University,37.431314,-122.169365,14.922951,0.463115,12.549167,1.324167,17.672358,0.002439
4,Yale University,41.257131,-72.98967,11.069672,4.619672,3.278333,3.325,21.445528,2.574797
5,California Institute of Technology,34.137014,-118.125288,17.866393,0.829508,15.536667,1.3025,22.19187,0.022764
6,Duke University,36.000156,-78.94423,14.294262,5.388525,8.89,3.339167,24.885366,3.849593
7,Johns Hopkins University,39.330202,-76.621854,13.431148,5.536066,6.55,2.8025,25.304878,6.033333
8,Northwestern University,42.055716,-87.675295,8.565574,2.827869,0.5675,2.1075,22.076423,4.304878
9,University of Pennsylvania,39.94934,-75.189649,12.077049,5.885246,5.000833,3.716667,23.417886,3.247967


In [12]:
# Create a map 
map_na = folium.Map(location=[39.8283, -98.5795], tiles='cartodbpositron', zoom_start=4)

# Add university points to the map
for idx, row in universities_df.iterrows():
    folium.Marker(location=[row["Latitude"], row["Longitude"]],popup=f"{row['School Name']}").add_to(map_na)

# Display map
map_na

In [13]:
def create_heatmap(dataframe, column):
    # Initialize the map 
    heatmap = folium.Map(location=[39.8283, -98.5795], tiles='cartodbpositron', zoom_start=4)
    
    # Define the viridis color scale
    viridis_gradient = {
        0.0: '#440154',  # Dark purple
        0.2: '#3b528b',  # Blue
        0.4: '#21908d',  # Teal
        0.6: '#5ec962',  # Green
        0.8: '#fde725',  # Yellow
        1.0: '#ffffbf'   # Light yellow
    }
    
    # Extract data 
    heatmap_data = list(zip(dataframe["Latitude"], dataframe["Longitude"], dataframe[column]))
    
    # Add the HeatMap layer
    HeatMap(data=heatmap_data, radius=20, gradient=viridis_gradient).add_to(heatmap)
    
    return heatmap

In [14]:
# Create heatmaps for average temperature (tavg) for each timeline
heatmap_fall_semester_temp_avg = create_heatmap(universities_df, "Sep.1 - Dec.31 Avg TAVG")
heatmap_winter_semester_temp_avg = create_heatmap(universities_df, "Jan.1 - April.30 Avg TAVG")
heatmap_summer_semester_temp_avg = create_heatmap(universities_df, "May.1 - Aug.31 Avg TAVG")

In [15]:
# Display heatmap for temperature average
heatmap_fall_semester_temp_avg  # For the timeline Sep.1 - Dec.31 (Fall Semester)

In [16]:
# Display heatmap for temperature average
heatmap_winter_semester_temp_avg  # For the timeline Jan.1 - April.30 (Winter Semester)

In [17]:
# Display heatmap for temperature average
heatmap_summer_semester_temp_avg  # For the timeline May.1 - Aug.31 (Summer Semester)

In [18]:
# Create heatmaps for average precipitation (prcp) for each timeline
heatmap_fall_semester_precip_avg = create_heatmap(universities_df, "Sep.1 - Dec.31 Avg PRCP")
heatmap_winter_semester_precip_avg = create_heatmap(universities_df, "Jan.1 - April.30 Avg PRCP")
heatmap_summer_semester_precip_avg = create_heatmap(universities_df, "May.1 - Aug.31 Avg PRCP")

In [19]:
# Display heatmap for precipiation
heatmap_fall_semester_precip_avg  # For the timeline Sep.1 - Dec.31 (Fall Semester)

In [20]:
# Display heatmap for precipiation
heatmap_winter_semester_precip_avg  # For the timeline Jan.1 - April.30 (Winter Semester)

In [21]:
# Display heatmap for precipiation
heatmap_summer_semester_precip_avg  # For the timeline May.1 - Aug.31 (Summer Semester)