# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:

# Set the API base URL
url = "http://api.openweathermap.org/data/2.5/weather?"


# Define an empty list to fetch the weather data for each city

city_data = []

# Print to logger

print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"{url}appid={weather_api_key}&q={city}"
    
  
    
    # Log the url, record, and set numbers
    
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        
        # Parse the JSON and retrieve data
        #result = 10 / 0
        
        city_weather = requests.get(city_url)
        data = city_weather.json()
        #print(json.dumps(data, indent=4, sort_keys=True))
        
        
        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        
        city_lat = data["coord"]["lat"]
        city_lon = data["coord"]["lon"]
        city_max_temp = data["main"]["temp_max"] - 273.15
        city_humidity = data["main"]["humidity"]
        city_clouds = data["clouds"]["all"]
        city_wind = data["wind"]["speed"]
        city_country = data["sys"]["country"]
        city_date = data["dt"]

        # Append the City information into city_data list
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Lon": city_lon,
                          "Max Temp in °C": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
    #Exception as e:
        #print(f"An error occurred: {e} of {type(e)}")
        print("City not found. Skipping...")
        pass

    # pause to avoid rate limiting
    time.sleep(1)

# Indicate that Data Loading is complete
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame.from_dict(city_data)

# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
# YOUR CODE HERE


fig, ax = plt.subplots()

ax.scatter(city_data_df["Lat"], city_data_df["Max Temp in °C"], marker='o', edgecolors='blue', facecolors='red', alpha=0.85)

# Incorporate the other graph properties
# YOUR CODE HERE

ax.set_xlabel("Latitude (°)")
ax.set_ylabel("Temperature (C)")
ax.set_title("Latitude Vs. Temperature")


# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
# YOUR CODE HERE

fig, ax = plt.subplots()

ax.scatter(city_data_df["Lat"], city_data_df["Humidity"], marker = '*', edgecolors = 'blue', facecolors = 'black', alpha = 0.75)

# Incorporate the other graph properties
# YOUR CODE HERE

ax.set_xlabel("Latitude (°)")
ax.set_ylabel("Humidity (%)")
ax.set_title("Latitude vs. Humidity ")

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
# YOUR CODE HERE

fig, ax =plt.subplots()

ax.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], marker = 'h', edgecolors = 'darkblue', facecolors = 'lightblue', alpha = 0.95)

# Incorporate the other graph properties
# YOUR CODE HERE

ax.set_xlabel("Latitude(°)")
ax.set_ylabel("Cloudiness(%)")
ax.set_title("Latitude vs. Cloudiness")

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
# YOUR CODE HERE

fig, ax =plt.subplots()
ax.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], marker = '>', edgecolors = 'green', facecolors = 'violet', alpha = 0.90)

# Incorporate the other graph properties
# YOUR CODE HERE

ax.set_xlabel("Latitude(°)")
ax.set_ylabel("Wind Speed (m/s)")
ax.set_title("Latitude vs. Wind Speed")

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
# YOUR CODE HERE

# Define independent (x) and dependent (y) variables

#Calculating linear regression:
    
#slope, intercept, r_value, p_value, std_err = linregress(x, y)

#Creating a plot:
    
#regress_values = slope * x + intercept
#plt.scatter(x, y, color='blue', marker='o', label='Data')
#plt.plot(x, regress_values, color='red', label='Regression Line')


In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
# YOUR CODE HERE


northern_hemi_df = city_data_df[city_data_df['Lat']>= 0]

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
# YOUR CODE HERE

southern_hemi_df = city_data_df[city_data_df["Lat"] < 0]

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
# YOUR CODE HERE


x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp in °C"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='lightblue', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Max Temperature (°C)')

ax.set_title('Linear Regression on Northern Hemisphere: Latitude vs. Temperature')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.05, 0.05), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

In [None]:
# Linear regression on Southern Hemisphere
# YOUR CODE HERE


x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp in °C"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='orange', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Max Temperature (°C)')

ax.set_title('Linear Regression on Southern Hemisphere: Latitude vs. Temperature')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.65, 0.05), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

**Discussion about the linear relationship:** 

The two graphs illustrate the relationship between maximum temperature and latitude in the Northern and Southern Hemispheres.

In the first graph, representing the Northern Hemisphere, a regression line with a negative slope is displayed. The Pearson correlation coefficient for these variables was calculated to be -0.75, indicating a strong negative relationship. This suggests that as we move away from the equator, maximum temperatures tend to decrease. This trend is consistent with our understanding of climate patterns, where regions closer to the equator generally experience warmer temperatures due to more direct sunlight throughout the year.

In contrast, the second graph for the Southern Hemisphere shows a regression line with a positive slope. The calculated Pearson correlation coefficient for these variables is 0.76, which signifies a strong positive relationship. This indicates that as we approach the equator, maximum temperatures increase. This finding aligns with the climatic characteristics of the Southern Hemisphere, where areas near the equator, such as parts of South America and Africa, tend to have higher temperatures compared to those at higher latitudes.

Overall, these results highlight the significant impact of latitude on maximum temperatures in both hemispheres. The strong correlations suggest that latitude is a key factor influencing temperature variations.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# YOUR CODE HERE

x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='lightblue', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Humidity (%)')

ax.set_title('Linear Regression on Northern Hemisphere: Latitude vs. Humidity')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.05, 0.05), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

In [None]:
# Southern Hemisphere
# YOUR CODE HERE

x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='orange', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Humidity (%)')

ax.set_title('Linear Regression on Southern Hemisphere: Latitude vs. Humidity')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.65, 0.05), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()


**Discussion about the linear relationship:** The above graphs illustrate the relationship between latitude and humidity in the Northern and Southern Hemispheres. The calculated Pearson correlation coefficient for the Northern Hemisphere is 0.01, while for the Southern Hemisphere it is -0.03. Both values are close to zero, indicating that there is no significant correlation between these variables in either hemisphere. This may suggest that other factors, such as local climatic conditions, could have a greater influence on humidity than latitude.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# YOUR CODE HERE


x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='lightblue', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Cloudiness (%)')

ax.set_title('Linear Regression on Northern Hemisphere: Latitude vs. Cloudiness')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.65, 0.12), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

In [None]:
# Southern Hemisphere
# YOUR CODE HERE

x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='orange', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Cloudiness (%)')

ax.set_title('Linear Regression on Southern Hemisphere: Latitude vs. Cloudiness')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.05, 0.10), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

**Discussion about the linear relationship:** The above graphs illustrate the relationship between cloudiness and latitude for the Northern Hemisphere in the first graph and the Southern Hemisphere in the second.

In the Northern Hemisphere, the calculated Pearson correlation coefficient is 0.00, indicating no correlation between latitude and cloudiness. This suggests that there is no significant relationship between these two variables in this region, and cloudiness does not vary with latitude. The lack of correlation implies that other factors, such as local weather conditions and seasonality, may have a more substantial impact on cloudiness than latitude alone.

In contrast, the Southern Hemisphere exhibits a slight dependence between latitude and cloudiness, as indicated by a Pearson correlation coefficient of 0.06. This suggests a very weak positive correlation, indicating a minor tendency for cloudiness to increase as latitude increases toward the poles. However, due to the low value of the coefficient, this correlation is not statistically significant and may result from random fluctuations in the data.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# YOUR CODE HERE

x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='lightblue', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Wind Speed (m/s)')

ax.set_title('Linear Regression on Northern Hemisphere: Latitude vs. Wind Speed')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.05, 0.95), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

In [None]:
# Southern Hemisphere
# YOUR CODE HERE

x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]


slope, intercept, r_value, p_value, std_err = linregress(x_values, y_values)
regress_values = slope * x_values + intercept


fig, ax = plt.subplots()
ax.scatter(x_values, y_values, color='orange', marker='o', label='Data')
ax.plot(x_values, regress_values, color='red', label='Regression Line')

ax.set_xlabel('Latitude (°)')
ax.set_ylabel('Wind Speed (m/s)')

ax.set_title('Linear Regression on Southern Hemisphere: Latitude vs. Wind Speed')

ax.annotate(f'Pearson\'s r = {r_value:.2f}', xy=(0.05, 0.90), xycoords='axes fraction', fontsize=10, color='black')

ax.legend()
plt.show()

**Discussion about the linear relationship:** The graphs above illustrate the relationship between wind speed and latitude, with the first graph representing the Northern Hemisphere and the second graph depicting the Southern Hemisphere.

In the Northern Hemisphere, the Pearson correlation coefficient of 0.08 indicates a very weak positive correlation between latitude and wind speed. This suggests a slight tendency for wind speed to increase as latitude rises. However, the correlation is so weak that it is likely not statistically significant, implying that any observed relationship may be attributed to random variation in the data rather than a genuine underlying trend.

Conversely, in the Southern Hemisphere, a Pearson correlation coefficient of -0.20 reveals a weak negative correlation between latitude and wind speed. This suggests that as latitude increases, wind speed may tend to decrease. Although this correlation is stronger than that observed in the Northern Hemisphere, it remains relatively weak, indicating that other factors may also significantly influence wind speed in this region.

Overall, in the Northern Hemisphere, the very weak positive correlation between latitude and wind speed implies minimal relationship, while in the Southern Hemisphere, the weak negative correlation suggests a potential decrease in wind speed with increasing latitude, though this relationship is also not robust.