# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
#import json

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")
print(cities)



---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# # Set the API base URL
url = "https://api.openweathermap.org/data/2.5/weather?"


# # Define an empty list to fetch the weather data for each city
city_data = []

# # Print to logger
# print("Beginning Data Retrieval     ")
# print("-----------------------------")

# # Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"{url}appid={weather_api_key}&q="
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url + city).json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
#there were a few cities that the open weather api wasn't able to find, I didn't 
#try to add them back

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
lat = city_data_df['Lat']
lng = city_data_df['Lng']
temp = city_data_df['Max Temp']
hum = city_data_df['Humidity']
clo = city_data_df['Cloudiness']
win = city_data_df['Wind Speed']
country = city_data_df['Country']
thedate = city_data_df['Date']
# Build scatter plot for latitude vs. temperature
# Incorporate the other graph properties


fig, ax = plt.subplots()
ax.scatter(lat,temp,label="Max Temperature For City")
ax.set_xlabel("Latitude")
ax.set_ylabel("Temperature")



# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
# Incorporate the other graph properties
fig, ax1 = plt.subplots()

ax1.scatter(lat,hum)
ax1.set_xlabel('Latitude')
ax1.set_ylabel('Humidity')


# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
# Incorporate the other graph properties
fig, ax2 = plt.subplots()

ax2.scatter(lat,clo)
ax2.set_xlabel('Latitude')
ax2.set_ylabel('Cloudiness')


# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
# Incorporate the other graph properties
fig, ax3 = plt.subplots()

ax3.scatter(lat,win)
ax3.set_xlabel("Latitude")
ax3.set_ylabel("Wind Speed")



# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
# b, m = np.polyfit(x ,y , 1)

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df.loc[city_data_df['Lat']>=0]

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df.loc[city_data_df['Lat']<0]

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
nlat = northern_hemi_df['Lat']
nlng = northern_hemi_df['Lng']
ntemp = northern_hemi_df['Max Temp']
nhum = northern_hemi_df['Humidity']
nclo = northern_hemi_df['Cloudiness']
nwin = northern_hemi_df['Wind Speed']
ncountry = northern_hemi_df['Country']
nthedate = northern_hemi_df['Date']
# Linear regression on Northern Hemisphere
fig, ax4 = plt.subplots()
ax4.scatter(nlat,ntemp)
ax4.set_xlabel('Latitude')
ax4.set_ylabel('Temperature')
m, b = np.polyfit(nlat,ntemp,1)
plt.plot(nlat,m*nlat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig5.png")
plt.show()


In [None]:
slat = southern_hemi_df['Lat']
slng = southern_hemi_df['Lng']
stemp = southern_hemi_df['Max Temp']
shum = southern_hemi_df['Humidity']
sclo = southern_hemi_df['Cloudiness']
swin = southern_hemi_df['Wind Speed']
scountry = southern_hemi_df['Country']
sthedate = southern_hemi_df['Date']
# Linear regression on Southern Hemisphere
fig, ax5 = plt.subplots()
ax5.scatter(slat,stemp)
ax5.set_xlabel('Latitude')
ax5.set_ylabel('Temperature')
m, b = np.polyfit(slat,stemp,1)
plt.plot(slat,m*slat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig6.png")
plt.show()

**Discussion about the linear relationship:** When a city is closer to the equator there is a noticable increase in the maximum temperature that the city experinces for bother hemispheres. Another thing i noticed is that in the nothern hemisphere the cities the max temperature measurment plateaus from the equator to about the 20th latitude. This could be related to the tilt of the earth during a years rotation. Another theory is the sahara is skewing the measurment due to being above the equator and extremly hot.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
fig, ax6 = plt.subplots()
ax6.scatter(nlat,nhum)
ax6.set_xlabel('Latitude')
ax6.set_ylabel('Humidity')
m, b = np.polyfit(nlat,nhum,1)
plt.plot(nlat,m*nlat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig7.png")
plt.show()

In [None]:
# Southern Hemisphere
fig, ax7 = plt.subplots()
ax7.scatter(slat,shum)
ax7.set_xlabel('Latitude')
ax7.set_ylabel('Humidity')
m, b = np.polyfit(slat,shum,1)
plt.plot(slat,m*slat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig8.png")
plt.show()

**Discussion about the linear relationship:** There isn't any strong correlation between latitude and humidity in the northern hemisphere. In the southern hemisphere however the closer to the equator the higher the humidity is. Maybe the different seasons in the southern hemisphere lead to this result?

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
fig, ax8 = plt.subplots()
ax8.scatter(nlat,nclo)
ax8.set_xlabel('Latitude')
ax8.set_ylabel('Cloudiness')
m, b = np.polyfit(nlat,nclo,1)
plt.plot(nlat,m*nlat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig9.png")
plt.show()

In [None]:
# Southern Hemisphere
fig, ax11 = plt.subplots()
ax11.scatter(slat,sclo)
ax11.set_xlabel('Latitude')
ax11.set_ylabel('Cloudiness')
m, b = np.polyfit(slat,sclo,1)
plt.plot(slat,m*slat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig10.png")
plt.show()

**Discussion about the linear relationship:** Cloudiness by Latitude yeilded a similar relationship between the northern and southern hemisphere as humidity did. There is no strong correlation in the north and the southern hemisphere shows that as you get closer to the equator the cities get cloudier.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
fig, ax10 = plt.subplots()
ax10.scatter(nlat,nwin)
ax10.set_xlabel('Latitude')
ax10.set_ylabel('Wind Speed')
m, b = np.polyfit(nlat,nwin,1)
plt.plot(nlat,m*nlat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig11.png")
plt.show()

In [None]:
# Southern Hemisphere
fig, ax11 = plt.subplots()
ax11.scatter(slat,swin)
ax11.set_xlabel('Latitude')
ax11.set_ylabel('Wind Speed')
m, b = np.polyfit(slat,swin,1)
plt.plot(slat,m*slat+b, color = 'red')
plt.ticklabel_format(style = 'plain')
plt.savefig("output_data/Fig12.png")
plt.show()

**Discussion about the linear relationship:** In both of the linear regressions there is no strong relationship between latitude and the wind speed the cities are experincing. Cities in the northern hemisphere get slightly more windy as you go north and the reverse is true for the south. The Artic and Antartic may be affecting the wind speeds to get those results.