# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [2]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

Number of cities in the list: 591


---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [3]:
# Retrieving city Lat and Long from openweathermap.org and then storing data
city_latlon_df = []

for i, city in enumerate(cities):
    try:
        city_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city}&appid={key}"
        city_info = req.get(city_url).json()
        city_latlon_df.append([city, city_info[0]['lat'], city_info[0]['lon']])
    except Exception as err:
        print(f"{city} city not found. Skipping...")
        pass

bredasdorp city not found. Skipping...
adamstown city not found. Skipping...
margaret river city not found. Skipping...
bimbo city not found. Skipping...
petauke city not found. Skipping...
defuniak springs city not found. Skipping...
jamestown city not found. Skipping...
weno city not found. Skipping...
port elizabeth city not found. Skipping...
port-aux-francais city not found. Skipping...
anloga city not found. Skipping...
waitangi city not found. Skipping...
igrim city not found. Skipping...
punta arenas city not found. Skipping...
hirtshals city not found. Skipping...
crucecita city not found. Skipping...
puerto natales city not found. Skipping...
grytviken city not found. Skipping...
martapura city not found. Skipping...
arraial do cabo city not found. Skipping...
san patricio city not found. Skipping...
klaksvik city not found. Skipping...
taiohae city not found. Skipping...
anar darah city not found. Skipping...
aasiaat city not found. Skipping...
montepuez city not found. Skip

In [4]:
# Creating a city, lat/long and, df to use in openweathermap url

city_lat_long_df = pd.DataFrame(city_latlon_df, columns = ['city' , 'lat', 'long'])


In [5]:
# Set the API base URL
url = 'https://api.openweathermap.org/data/2.5/weather'

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    try:
        lat = city_lat_lon_df.loc[city_lat_long_df['city'] == city, 'lat'].values[0]
        long = city_lat_lon_df.loc[city_lat_long_df['city'] == city, 'lon'].values[0]
        city_url = f"{url}?lat={lat}&lon={long}&appid={key}"
    except Exception as err:
        print(f"{city} city location failed. Skipping...")
        pass

    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
         # Parse the JSON and retrieve data
        city_weather = req.get(city_url).json()
        time.sleep(1)
        
        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']  - 273.15 # <- unit conversions for Celsius
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']
        city_date = datetime.fromtimestamp(city_weather['dt']) # <- UNIX timestamp conversion to date

        # Append the City information into city_data list
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except Exception as err:
        print(f"{city} city not found. Skipping...")
        pass

    # pause to avoid rate limiting
    time.sleep(1)

# Indicate that Data Loading is complete
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
bredasdorp city location failed. Skipping...
Processing Record 1 of Set 1 | bredasdorp
bredasdorp city not found. Skipping...
adamstown city location failed. Skipping...
Processing Record 2 of Set 1 | adamstown
adamstown city not found. Skipping...
margaret river city location failed. Skipping...
Processing Record 3 of Set 1 | margaret river
margaret river city not found. Skipping...
bimbo city location failed. Skipping...
Processing Record 4 of Set 1 | bimbo
bimbo city not found. Skipping...
petauke city location failed. Skipping...
Processing Record 5 of Set 1 | petauke
petauke city not found. Skipping...
defuniak springs city location failed. Skipping...
Processing Record 6 of Set 1 | defuniak springs
defuniak springs city not found. Skipping...
jamestown city location failed. Skipping...
Processing Record 7 of Set 1 | jamestown
jamestown city not found. Skipping...
weno city location failed. Skipping...
Processing Record 8

KeyboardInterrupt: 

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
date = city_date.strftime('%Y-%m-%d')
plt.scatter(city_data_df['Lat'], city_data_df['Max Temp'])

# Incorporate the other graph properties
plt.title(f'City Latitude vs. Max Temperature ({dt})')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature in (C)')
plt.grid()

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
dt = city_date.strftime('%Y-%m-%d')
plt.scatter(city_data_df['Lat'], city_data_df['Humidity'])E

# Incorporate the other graph properties
plt.title(f'City Latitude vs. Humidity ({dt})')
plt.xlabel('Latitude')
plt.ylabel('Humididty (%)')
plt.grid()

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
dt = city_date.strftime('%Y-%m-%d')
plt.scatter(city_data_df['Lat'], city_data_df['Cloudiness'])

# Incorporate the other graph properties
plt.title(f'City Latitude vs. Cloudiness ({dt})')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.grid()

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
dt = city_date.strftime('%Y-%m-%d')
plt.scatter(city_data_df['Lat'], city_data_df['Wind Speed'])

# Incorporate the other graph properties
plt.title(f'City Latitude vs. Wind Speed ({dt})')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (m/s)')
plt.grid()

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots

def weather_plot(df: pd.DataFrame, xAxis: str, yAxis: str, xLabel: str, yLabel: str, annotateXY: tuple):
    
    # ARGUMENTS FOR LINEAR REGRESSION -----------------------------------------
    # df = DataFrame containing x and y axis plot values                      |
    # xAxis = DataFrame x axis column name                                    |
    # yAxis = DataFrame y axis column name                                    |
    # xLabel = Plot x axis column label                                       |
    # xLabel = Plot y axis column label                                       |
    # annotateXY = Plot xy location of the annotation                         |
    #--------------------------------------------------------------------------
    
    rval = st.pearsonr(df[yAxis], df[xAxis])[0]
    print(f'The r-value is: {rval}')
    
    # Add the linear regression equation and line to plot
    (slope, intercept, rvalue, pvalue, stderr) = linregress(df[xAxis], df[yAxis])
    regress_values = df[xAxis] * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    plt.scatter(df[xAxis], df[yAxis])
    plt.plot(df[xAxis],regress_values,"r-")
    plt.annotate(line_eq,annotateXY,fontsize=15,color="red")
    plt.xlabel(xLabel)
    plt.ylabel(yLabel)
    plt.show()
    

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
north_hem_df = city_data_df.copy()
north_hem_df = north_hemi_df.loc[north_hem_df['Lat'] >= 0]

# Display sample data
north_hem_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
south_hem_df = city_data_df.copy()
south_hem_df = south_hem_df.loc[south_hem_df['Lat'] < 0]

# Display sample data
south_hem_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
weather_plot(north_hem_df, 'Lat', 'Max Temp', 'Latitude', 'Max Temp', (10,-16))

In [None]:
# Linear regression on Southern Hemisphere
weather_plot(south_hem_df, 'Lat', 'Max Temp', 'Latitude', 'Max Temp', (-25,5))

**Discussion about the linear relationship:** 
The Linear regression shows a strong relationship with higher temperatures for cities that are near the latitude of zero and a relationship with lower temperatures in latitudes that are further away from latitude 0

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
weather_plot(nort_hem_df, 'Lat', 'Humidity', 'Latitude', 'Humidity', (48,15))

In [None]:
# Southern Hemisphere
weather_plot(south_hem_df, 'Lat', 'Humidity', 'Latitude', 'Humidity', (-23,27))

**Discussion about the linear relationship:** 
The linear relationship shows a relationship for both hemispheres. A small and consistant relationship between the humidity and latitude. As the latitude further approaches north the humidity rises as well. 

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
weather_plot(north_hem_df, 'Lat', 'Cloudiness', 'Latitude', 'Cloudiness', (20,35))

In [None]:
# Southern Hemisphere
weather_plot(south_hem_df, 'Lat', 'Cloudiness', 'Latitude', 'Cloudiness', (-40,18))

**Discussion about the linear relationship:** 

According to the regression model there is no correlation for the southern and northern hemispheres as it relates to cloudiness.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
weather_plot(north_hem_df, 'Lat', 'Wind Speed', 'Latitude', 'Wind Speed', (0,11))

In [None]:
# Southern Hemisphere
weather_plot(south_hem_df, 'Lat', 'Wind Speed', 'Latitude', 'Wind Speed', (-25,12))

**Discussion about the linear relationship:** 
The linear relationship shows near to no correlation for the northern hemipshere than the southern hemisphere
For both hemispheres though, the wind speed decreases as the latitude value approaches closer to the equator.