# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [3]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

Number of cities in the list: 605


In [4]:
print(cities)

['kirkwall', 'tangalle', 'mata-utu', 'taiohae', 'port-aux-francais', 'blackmans bay', 'puerto san carlos', 'mount gambier', 'longyearbyen', 'albany', 'molde', 'kingston', 'east london', 'chonchi', 'yafran', 'gadzhiyevo', 'peniche', 'saipan', 'bilibino', 'pyapon', 'tolanaro', 'daru', 'vilyuchinsk', 'ribeira grande', 'vernon', 'richards bay', 'anna regina', 'fuvahmulah', 'grytviken', 'atafu village', 'anzio', 'adamstown', 'ushuaia', 'waitangi', 'severo-yeniseyskiy', 'happy valley-goose bay', 'uribia', 'isafjordur', 'pontian kechil', "la'ie", 'tiksi', 'college', 'puerto natales', 'teluknaga', 'benghazi', 'iqaluit', 'lebu', 'edinburgh of the seven seas', 'berezovo', 'igarka', 'hermanus', "kapa'a", 'la passe', 'papatowai', 'broken hill', 'hithadhoo', "'ain el bell", 'stanley', 'atoyac de alvarez', 'bargaal', 'qamdo', 'coahuayana de hidalgo', 'nema', 'pacific grove', 'bredasdorp', 'la ronge', 'yellowknife', 'puerto ayora', 'utrik', 'talakan', 'kokstad', 'new norfolk', 'wailua homesteads', 'q

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# Set the API base URL
url = f"https://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units=imperial"

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = url + f"&q={city}"
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]
        
        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except Exception as e:
        print(e)
        print("City not found. Skipping...")
        pass
    # sleep
    time.sleep(1)
    
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | kirkwall
Processing Record 2 of Set 1 | tangalle
Processing Record 3 of Set 1 | mata-utu
Processing Record 4 of Set 1 | taiohae
'coord'
City not found. Skipping...
Processing Record 5 of Set 1 | port-aux-francais
Processing Record 6 of Set 1 | blackmans bay
Processing Record 7 of Set 1 | puerto san carlos
'coord'
City not found. Skipping...
Processing Record 8 of Set 1 | mount gambier
Processing Record 9 of Set 1 | longyearbyen
Processing Record 10 of Set 1 | albany
Processing Record 11 of Set 1 | molde
Processing Record 12 of Set 1 | kingston
Processing Record 13 of Set 1 | east london
Processing Record 14 of Set 1 | chonchi
Processing Record 15 of Set 1 | yafran
Processing Record 16 of Set 1 | gadzhiyevo
Processing Record 17 of Set 1 | peniche
Processing Record 18 of Set 1 | saipan
Processing Record 19 of Set 1 | bilibino
Processing Record 20 of Set 1 | pyapon
Processing Record 21 of Set 1 | to

Processing Record 39 of Set 4 | nar'yan-mar
Processing Record 40 of Set 4 | santo antonio do ica
Processing Record 41 of Set 4 | ghat
Processing Record 42 of Set 4 | burang
Processing Record 43 of Set 4 | avarua
Processing Record 44 of Set 4 | castillos
Processing Record 45 of Set 4 | alakurtti
Processing Record 46 of Set 4 | locharbriggs
Processing Record 47 of Set 4 | vila franca do campo
Processing Record 48 of Set 4 | san antonio de pale
Processing Record 49 of Set 4 | el fasher
Processing Record 0 of Set 5 | sur
Processing Record 1 of Set 5 | pago pago
Processing Record 2 of Set 5 | sukumo
Processing Record 3 of Set 5 | ponta delgada
Processing Record 4 of Set 5 | ma'rib
Processing Record 5 of Set 5 | lautoka
Processing Record 6 of Set 5 | keokuk
Processing Record 7 of Set 5 | khovd
Processing Record 8 of Set 5 | novo aripuana
Processing Record 9 of Set 5 | atbasar
Processing Record 10 of Set 5 | weno
Processing Record 11 of Set 5 | ocean shores
Processing Record 12 of Set 5 | ola

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("../output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("../output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
#consitent color/ allows for quick changes
select = "skyblue"

In [None]:
# Build scatter plot for latitude vs. temperature
plt.figure(figsize=(10,6))
plt.scatter(city_data_df["Lat"], city_data_df["Max Temp"], color=select, s=150)

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("Latitude vs Temperature")
plt.grid(axis="both", color="lightgrey", linestyle="--", alpha=0.2)

# Save the figure
plt.savefig("../output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
plt.figure(figsize=(10,6))
plt.scatter(city_data_df["Lat"], city_data_df["Humidity"], color=select, s=150)

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Latitude vs Humidity")
plt.grid(axis="both", color="lightgrey", linestyle="--", alpha=0.2)

# Save the figure
plt.savefig("../output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build scatter plot for latitude vs. temperature
plt.figure(figsize=(10,6))
plt.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], color=select, s=150)

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Latitude vs Cloudiness")
plt.grid(axis="both", color="lightgrey", linestyle="--", alpha=0.2)

# Save the figure
plt.savefig("../output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build scatter plot for latitude vs. temperature
plt.figure(figsize=(10,6))
plt.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], color=select, s=150)

# Incorporate the other graph properties
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mile/hour)")
plt.title("Latitude vs Wind Speed")
plt.grid(axis="both", color="lightgrey", linestyle="--", alpha=0.2)

# Save the figure
plt.savefig("../output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
def plot_linear_regression(x_values, y_values, title, text_coordinates):
    
    # Compute linear regression
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    # Plot
    plt.scatter(x_values,y_values, color=select)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,(x_values.min(),y_values.min()),fontsize=15,color="red")
    plt.xlabel("Latitude")
    plt.ylabel(title)
    print(f"The r-value is: {rvalue**2}")
    plt.show()

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df.loc[city_data_df.Lat >= 0].reset_index(drop=True)

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df.loc[city_data_df.Lat < 0].reset_index(drop=True)

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
plot_linear_regression(x_values, y_values, "Temperature",(6,-10))

In [None]:
# Linear regression on Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
plot_linear_regression(x_values, y_values, "Temperature",(6,-10))

**Discussion about the linear relationship:** Based on residual values of about .67 for the northern hemisphere and about .71 for the southern hemisphere, the graphs show some correlation between latitude and temperature. For the northern hemisphere, there is a negative correlation where a rise in the latitude leads to a drop in temperature. The southern hemisphere has the opposite relationship where a drop in latitude leads to a drop in temperature. This makes sense given the relationship between the north and south pole and the amount of direct sunlight recieved in these locations.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
plot_linear_regression(x_values, y_values, "Humidity",(6,-10))

In [None]:
# Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
plot_linear_regression(x_values, y_values, "Humidity",(6,-10))

**Discussion about the linear relationship:** The residuals comparing humidity to latitude in both hemisphers are very close to zero. This shows that there is no linear correlation to humidity and latitude. 

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
plot_linear_regression(x_values, y_values, "Cloudiness",(6,-10))

In [None]:
# Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
plot_linear_regression(x_values, y_values, "Cloudiness",(6,-10))

**Discussion about the linear relationship:** The residuals comparing cloudiness to latitude in both hemisphers are very close to zero. This shows that there is no linear correlation to cloudiness and latitude. 

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
plot_linear_regression(x_values, y_values, "Wind Speed",(6,-10))

In [None]:
# Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
plot_linear_regression(x_values, y_values, "Wind Speed",(6,-10))

**Discussion about the linear relationship:** The residuals comparing wind speed to latitude in both hemisphers are very close to zero. This shows that there is no linear correlation to wind speed and latitude. However, the closer to the poles (-100 SH, 100 NH) the cities are seem to show some possible outliers in both graphs. An alternative model may find more information. 