# WeatherPy
-----------------------------------------------------------------------------------------------------------------------------------------------------------

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from scipy.stats import linregress

# I want a time delay on my requests so that I don't exceed 60 per minute; per the API documentation, that will cause a lockout.
import time

from scipy.stats import linregress

# Import the OpenWeatherMap API key
from api_keys import weather_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

# Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

## Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the starter code

In [None]:
# Extract city names
cities = pd.read_csv('cities.csv')['City']

# Set the API base URL
url = "https://api.openweathermap.org/data/2.5/weather?"

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):

    # Sleep for 1 second, to not exceed 60 per minute.
    time.sleep(1)
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"{url}q={city}&appid={weather_api_key}"
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        response = requests.get(city_url)
        city_weather = response.json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"] - 273.15 # <---- This converts from Kelvin to Celsius. API loads in K.
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


In [None]:
# Convert the cities weather data into a Pandas DataFrame while removing the non-numerical answers (to avoid skipped cities)
city_data_df = pd.DataFrame(city_data)
city_data_df = city_data_df.dropna()

# Record count
city_data_df.count()


In [None]:
city_data_df.head()

In [None]:
# Export to .csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

# Scatter Plots
## Latitude vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
plt.figure(figsize=(6, 4))
plt.scatter(city_data_df['Lat'], city_data_df['Max Temp'], color='dimgray', alpha=0.5)
plt.title('City Maximum Temperature vs. Latitude')
plt.xlabel('Latitude')
plt.ylabel('Maximum Temperature')
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig1.png")

plt.show()

In [None]:
# Build the scatter plots for latitude vs. humidity
plt.figure(figsize=(6, 4))
plt.scatter(city_data_df['Lat'], city_data_df['Humidity'], color='dimgray', alpha=0.5)
plt.title('City Humidity vs. Latitude')
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

In [None]:
# Build the scatter plots for latitude vs. cloudiness
plt.figure(figsize=(6, 4))
plt.scatter(city_data_df['Lat'], city_data_df['Cloudiness'], color='dimgray', alpha=0.5)
plt.title('City Cloudiness vs. Latitude')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

In [None]:
# Build the scatter plots for latitude vs. wind speed
plt.figure(figsize=(6, 4))
plt.scatter(city_data_df['Lat'], city_data_df['Wind Speed'], color='dimgray', alpha=0.5)
plt.title('City Wind Speed vs. Latitude')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

# Requirement 2: Compute Linear Regression for Each Relationship

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df[city_data_df['Lat'] >= 0]

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df[city_data_df['Lat'] < 0]

# Display sample data
southern_hemi_df.head()

## Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere

# this line can be deleted when the kernel is run start to finish. It is at the top with the rest of the imports.
# Given that this data set takes ten minutes to get from the API, I'm putting it here so as not to restart the process of going through each step.
#from scipy.stats import linregress

# Linear regression on Northern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 1, np.max(y) - 40, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)

# Parameters
x = northern_hemi_df['Lat']
y = northern_hemi_df['Max Temp']
xlabel = "Latitude"
ylabel = "Maximum Temperature"

plot_linear_regression(x, y, xlabel, ylabel)

In [None]:
# Linear regression on Southern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 1, np.max(y) - 5, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)

# Parameters
x = southern_hemi_df['Lat']
y = southern_hemi_df['Max Temp']
xlabel = "Latitude"
ylabel = "Maximum Temperature"

plot_linear_regression(x, y, xlabel, ylabel)

### Discussion about linear relationship: 

High temperature is strongly correlated with latitudes near 0.

Higher northern latitudes correlate more strongly with lower temperatures than southern latitudes equally far south.

This is not at all outside of expectations as the southern hemisphere tends to have more mild temperatures in the far south and hotter temperatures nearing the equator. Northern latitudes mirror this.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 50, np.max(y) - 80, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)
                                     
# Parameters
x = northern_hemi_df['Lat']
y = northern_hemi_df['Humidity']
xlabel = "Latitude"
ylabel = "Humidity"

plot_linear_regression(x, y, xlabel, ylabel)

In [None]:
# Linear regression on Southern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 0, np.max(y) - 50, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)
                                     
# Parameters
x = southern_hemi_df['Lat']
y = southern_hemi_df['Humidity']
xlabel = "Latitude"
ylabel = "Humidity"

plot_linear_regression(x, y, xlabel, ylabel)

### Discussion about Linear Relationship:

Humidity is correlated with higher northern lattitudes.

Humidity is also correlated with lower higher (closer to 0) southern latitudes. So, as you go north, humidity generally increases.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 90, np.max(y) - 80, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)
                                     
# Parameters
x = northern_hemi_df['Lat']
y = northern_hemi_df['Cloudiness']
xlabel = "Latitude"
ylabel = "Cloudiness"

plot_linear_regression(x, y, xlabel, ylabel)

In [None]:
# Linear regression on Southern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 0, np.max(y) - 50, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)
                                     
# Parameters
x = southern_hemi_df['Lat']
y = southern_hemi_df['Cloudiness']
xlabel = "Latitude"
ylabel = "Cloudiness"

plot_linear_regression(x, y, xlabel, ylabel)

### Discussion about Linear Relationship:

Cloudiness north of the equator is almost not correlated with latitude.

Cloudiness south of the equator increases approaching the equator.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 10, np.max(y) - 2, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)
                                     
# Parameters
x = northern_hemi_df['Lat']
y = northern_hemi_df['Wind Speed']
xlabel = "Latitude"
ylabel = "Wind Speed"

plot_linear_regression(x, y, xlabel, ylabel)

In [None]:
# Linear regression on Southern Hemisphere
def plot_linear_regression(x, y, xlabel, ylabel):
    x = np.array(x)  # Convert x to a NumPy array
    y = np.array(y)  # Convert y to a NumPy array
    slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x, y)
    plt.plot(x, regress_values, "r-")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.text(np.min(x) + 0, np.max(y) - 2, line_eq, fontsize=12, color="red")
    plt.show()
    print("The R value is:", rvalue)
                                     
# Parameters
x = southern_hemi_df['Lat']
y = southern_hemi_df['Wind Speed']
xlabel = "Latitude"
ylabel = "Wind Speed"

plot_linear_regression(x, y, xlabel, ylabel)

### Discussion about Linear Relationship:

Wind speed in the northern latitudes is somewhat correlated, but only slightly, with slightly higher speeds for higher lattitudes than lower ones.

Same is true for the southern latitudes. Wind speed increases moving away from the equator.