# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# Set the API base URL
url = "https://api.openweathermap.org/data/2.5/weather"  # OpenWeatherMap API URL

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city (including the API key)
    city_url = f"{url}?q={city}&appid={weather_api_key}&units=metric" 

    # Log the url, record, and set numbers
    print(f"Processing Record {record_count} of Set {set_count} | {city}")

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Send a GET request to the API
        city_weather = requests.get(city_url).json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']
        city_date = city_weather['dt']

        # Append the City information into city_data list
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass

    # Pause to avoid rate limiting (1 second pause between requests)
    time.sleep(1)

# Indicate that Data Loading is complete
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
plt.scatter(city_data_df["Lat"], city_data_df["Max Temp"], color="blue", edgecolors="black")

# Incorporate the other graph properties
plt.title("City Latitude vs. Max Temperature", fontsize=14)
plt.xlabel("Latitude", fontsize=12)
plt.ylabel("Max Temperature (°C)", fontsize=12)
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()


#### Latitude Vs. Humidity

In [None]:
# Build scatter plot for latitude vs. humidity
plt.scatter(city_data_df["Lat"], city_data_df["Humidity"], color="blue", edgecolors="black")

# Incorporate the other graph properties
plt.title("City Latitude vs. Humidity", fontsize=14)
plt.xlabel("Latitude", fontsize=12)
plt.ylabel("Humidity (%)", fontsize=12)
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()


#### Latitude Vs. Cloudiness

In [None]:
# Build scatter plot for latitude vs. cloudiness
plt.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], color="blue", edgecolors="black")

# Incorporate the other graph properties
plt.title("City Latitude vs. Cloudiness", fontsize=14)
plt.xlabel("Latitude", fontsize=12)
plt.ylabel("Cloudiness (%)", fontsize=12)
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()


#### Latitude vs. Wind Speed Plot

In [None]:
# Build scatter plot for latitude vs. wind speed
plt.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], color="blue", edgecolors="black")

# Incorporate the other graph properties
plt.title("City Latitude vs. Wind Speed", fontsize=14)
plt.xlabel("Latitude", fontsize=12)
plt.ylabel("Wind Speed (m/s)", fontsize=12)
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()


---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

def create_linear_regression_plot(x, y, xlabel, ylabel, title, output_file):
    # Perform linear regression
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    
    # Calculate the regression line
    regression_line = slope * np.array(x) + intercept
    
    # Create the scatter plot
    plt.scatter(x, y, color="blue", edgecolors="black")
    
    # Plot the regression line
    plt.plot(x, regression_line, color="red", linewidth=2)

    # Add titles and labels
    plt.title(title, fontsize=14)
    plt.xlabel(xlabel, fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    
    # Add text for the equation and R-squared value in the bottom-right corner
    plt.text(0.95, 0.05, f"y = {slope:.2f}x + {intercept:.2f}\n$R^2 = {r_value**2:.2f}$", 
             transform=plt.gca().transAxes, fontsize=12, verticalalignment='bottom', horizontalalignment='center', color="black")

    # Display grid
    plt.grid(True)
    
    # Save the figure
    plt.savefig(output_file)
    
    # Show plot
    plt.show()


In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df[city_data_df["Lat"] >= 0]

# Display sample data
northern_hemi_df.head()


In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df[city_data_df["Lat"] < 0]

# Display sample data
southern_hemi_df.head()


###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern data
x = northern_hemi_df["Lat"]  
y = northern_hemi_df["Max Temp"]  

# Add the plot title and output file name
title = "Latitude vs. Max Temperature (Northern Hemisphere)"
output_file = "output_data/LinearReg_Lat_MaxTemp_NorthHem.png"

# Create the plot
create_linear_regression_plot(x, y, "Latitude", "Max Temperature (°C)", title, output_file)


In [None]:
# Linear regression on Southern Hemisphere
x_south = southern_hemi_df["Lat"]  
y_south = southern_hemi_df["Max Temp"] 

# Add the plot title and output file name for Southern Hemisphere
title_south = "Latitude vs. Max Temperature (Southern Hemisphere)"
output_file_south = "output_data/LinearReg_Lat_MaxTemp_SouthHem.png"

# Create the plot for Southern Hemisphere
create_linear_regression_plot(x_south, y_south, "Latitude", "Max Temperature (°C)", title_south, output_file_south)


**Discussion about the linear relationship:** Max temperature increases as latitude (distance from the equator) decreases. 41% of variation in max temperature in the southern hemisphere is accounted for by the graph. For the northern hemisphere, the relationship is stronger at 73%. 

### Humidity vs. Latitude Linear Regression Plot

In [None]:
#Linear regression on northern hemisphere
x_north = northern_hemi_df["Lat"]  
y_north = northern_hemi_df["Humidity"]  

# Add the plot title and output file name for Northern Hemisphere
title_north = "Linear Regression: Latitude vs. Max Temperature (Northern Hemisphere)"
output_file_north = "output_data/LinearReg_Lat_MaxTemp_NorthHem.png"

# Create the plot for Northern Hemisphere
create_linear_regression_plot(x_north, y_north, "Latitude", "Humidity", title_north, output_file_north)


In [None]:
x_south = southern_hemi_df["Lat"] 
y_south = southern_hemi_df["Humidity"]  

# Add the plot title and output file name for Southern Hemisphere
title_south = "Linear Regression: Latitude vs. Humidity (Southern Hemisphere)"
output_file_south = "output_data/LinearReg_Lat_Humidity_SouthHem.png"

# Create the plot for Southern Hemisphere
create_linear_regression_plot(x_south, y_south, "Latitude", "Humidity", title_south, output_file_south)

**Discussion about the linear relationship:** Latitude and humidity have a slight positive correlation, but the R squared value is so low that latitude is likely a poor indicator of humidity. 

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x_north_cloudiness = northern_hemi_df["Lat"]  # Independent variable: Latitude
y_north_cloudiness = northern_hemi_df["Cloudiness"]  # Dependent variable: Cloudiness

# Add the plot title and output file name for Northern Hemisphere (Cloudiness)
title_north_cloudiness = "Linear Regression: Latitude vs. Cloudiness (Northern Hemisphere)"
output_file_north_cloudiness = "output_data/LinearReg_Lat_Cloudiness_NorthHem.png"

# Create the plot for Northern Hemisphere (Cloudiness)
create_linear_regression_plot(x_north_cloudiness, y_north_cloudiness, "Latitude", "Cloudiness (%)", title_north_cloudiness, output_file_north_cloudiness)

In [None]:
# Southern Hemisphere
x_south_cloudiness = southern_hemi_df["Lat"] 
y_south_cloudiness = southern_hemi_df["Cloudiness"]  
# Add plot title and output file name for Southern Hemisphere (Cloudiness)
title_south_cloudiness = "Linear Regression: Latitude vs. Cloudiness (Southern Hemisphere)"
output_file_south_cloudiness = "output_data/LinearReg_Lat_Cloudiness_SouthHem.png"

# Create the plot for Southern Hemisphere (Cloudiness)
create_linear_regression_plot(x_south_cloudiness, y_south_cloudiness, "Latitude", "Cloudiness (%)", title_south_cloudiness, output_file_south_cloudiness)


**Discussion about the linear relationship:** There is minimal correlation between latitude and percentage of clouds (cloudiness)

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere

x_north_wind = northern_hemi_df["Lat"] 
y_north_wind = northern_hemi_df["Wind Speed"]  

# Add the plot title and output file name for Northern Hemisphere (Wind Speed)
title_north_wind = "Linear Regression: Latitude vs. Wind Speed (Northern Hemisphere)"
output_file_north_wind = "output_data/LinearReg_Lat_WindSpeed_NorthHem.png"

# Create the plot for Northern Hemisphere (Wind Speed)
create_linear_regression_plot(x_north_wind, y_north_wind, "Latitude", "Wind Speed (m/s)", title_north_wind, output_file_north_wind)


In [None]:
# Southern Hemisphere

x_south_wind = southern_hemi_df["Lat"]  # Independent variable: Latitude
y_south_wind = southern_hemi_df["Wind Speed"]  

# Add the plot title and output file name for Southern Hemisphere (Wind Speed)
title_south_wind = "Linear Regression: Latitude vs. Wind Speed (Southern Hemisphere)"
output_file_south_wind = "output_data/LinearReg_Lat_WindSpeed_SouthHem.png"

# Create the plot for Southern Hemisphere (Wind Speed)
create_linear_regression_plot(x_south_wind, y_south_wind, "Latitude", "Wind Speed (m/s)", title_south_wind, output_file_south_wind)


**Discussion about the linear relationship:** There is very little correlation between wind speed and latitude. 