In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json
import time
import scipy.stats as st
from scipy.stats import linregress

#citipy to determine lat/lngs of cities
from citipy import citipy

# Import API Key
from api_keys import weather_api_key

#file to store cities output data
city_data = "city_data.csv"

#lat/Lng range
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
#list for cities and list lat and lngs, use zip to combine
lat_lngs = [] 
cities = [] 

#random variation for lat lngs
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

#acquire closest city from lat/lng combination with for loop
for lat_lngs in lat_lngs:
    city = citipy.nearest_city(lat_lngs[0], lat_lngs[1]).city_name
    
    #make unique values in cities list to avoid duplicates
    if city not in cities:
        cities.append(city)
#generate cities list
len(cities)


621

In [3]:
#generate lists for weather variables in cities

city_name_list = []
cloudiness_list = []
country_list = []
date_list = []
humidity_list = []
lat_list = []
lng_list = []
max_temp_list = []
wind_speed_list = []
index_counter = 0
set_counter = 1

In [4]:
print("Beginning Data Retrieval ")
print("-----------------------------")

base_url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{base_url}appid={weather_api_key}&units={units}&q="

#use api call to gather current weather data and store data in created lists
for index, city in enumerate(cities, start = 1):
    try:
        response = requests.get(query_url + city).json()
        city_name_list.append(response["name"])
        cloudiness_list.append(response["clouds"]["all"])
        country_list.append(response["sys"]["country"])
        date_list.append(response["dt"])
        humidity_list.append(response["main"]["humidity"])
        lat_list.append(response["coord"]["lat"])
        lng_list.append(response["coord"]["lon"])
        max_temp_list.append(response['main']['temp_max'])
        wind_speed_list.append(response["wind"]["speed"])
        
        if index_counter > 49:
            index_counter = 0
            set_counter = set_counter + 1
    
        else:
            index_counter = index_counter + 1
            
        print(f"Processing Record {index_counter} of Set {set_counter} : {city}") 
  
    except(KeyError, IndexError):
        print("City not found. Skipping...")

print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

Beginning Data Retrieval 
-----------------------------
Processing Record 1 of Set 1 : zhangjiakou
Processing Record 2 of Set 1 : mahuva
Processing Record 3 of Set 1 : punta arenas
Processing Record 4 of Set 1 : jamestown
Processing Record 5 of Set 1 : kahului
Processing Record 6 of Set 1 : saint-philippe
Processing Record 7 of Set 1 : kalabo
Processing Record 8 of Set 1 : rikitea
Processing Record 9 of Set 1 : vaini
Processing Record 10 of Set 1 : bethel
Processing Record 11 of Set 1 : dingle
City not found. Skipping...
Processing Record 12 of Set 1 : cape town
Processing Record 13 of Set 1 : san carlos
Processing Record 14 of Set 1 : lazaro cardenas
Processing Record 15 of Set 1 : presidencia roque saenz pena
Processing Record 16 of Set 1 : hay river
Processing Record 17 of Set 1 : butaritari
Processing Record 18 of Set 1 : north platte
Processing Record 19 of Set 1 : albany
Processing Record 20 of Set 1 : busselton
City not found. Skipping...
Processing Record 21 of Set 1 : tuktoyak

In [5]:
#generate dataframe with data
weather_df = pd.DataFrame({
    "City" : city_name_list,
    "Cloudiness" : cloudiness_list,
    "Country" : country_list,
    "Date" : date_list,
    "Humidity" : humidity_list,
    "Lat" : lat_list,
    "Lng" : lng_list,
    "Max Temp" : max_temp_list,
    "Wind Speed" : wind_speed_list
})

In [8]:
weather_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,Zhangjiakou,100,CN,1626366685,85,40.8100,114.8794,73.72,1.88
1,Mahuva,86,IN,1626366686,77,21.0914,71.7622,84.58,12.84
2,Punta Arenas,0,CL,1626366686,93,-53.1500,-70.9167,37.51,0.00
3,Jamestown,40,US,1626366516,67,42.0970,-79.2353,81.09,13.00
4,Kahului,40,US,1626366686,80,20.8947,-156.4700,77.85,12.66
...,...,...,...,...,...,...,...,...,...
570,Bom Jesus,23,BR,1626366887,22,-9.0744,-44.3586,96.49,7.14
571,Lipno,89,PL,1626366887,90,52.8444,19.1785,68.90,6.71
572,Samarai,63,PG,1626366888,86,-10.6167,150.6667,79.72,10.58
573,Penhold,1,CA,1626366889,68,52.1334,-113.8687,70.84,5.75


In [9]:
#save data to csv
weather_df.to_csv("Data/city_data.csv", index = False)