# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from config import api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy



In [2]:
# api_key

### Generate the Cities List by Using the `citipy` Library

In [3]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

Number of cities in the list: 588


In [4]:
print(cities)

['tuktoyaktuk', 'avarua', 'atasu', 'burnie', 'kapaa', 'taolanaro', 'ushuaia', 'manga', 'nikolskoye', 'mattru', 'chokurdakh', 'hilo', 'busselton', 'brainerd', 'vanimo', 'nanortalik', 'east london', 'punta arenas', 'mys shmidta', 'ribeira grande', 'cape town', 'hithadhoo', 'saint george', 'port alfred', 'upernavik', 'carauari', 'hobart', 'qaanaaq', 'coahuayana', 'geraldton', 'bethel', 'inuvik', 'sitka', 'healdsburg', 'albany', 'bluff', 'cuiluan', 'banmo', 'nakonde', 'narsaq', 'monroe', 'longyearbyen', 'puerto ayora', 'amos', 'illoqqortoormiut', 'kishtwar', 'dikson', 'taoudenni', 'saquarema', 'garissa', 'yellowknife', 'marawi', 'hay river', 'lompoc', 'kodiak', 'kruisfontein', 'vavuniya', 'mataura', 'torbay', 'katsuura', 'calabozo', 'brigantine', 'jamestown', 'la tuque', 'lalomanu', 'nioro', 'butaritari', 'anchorage', 'faanui', 'saleaula', 'hermanus', 'banchory', 'codrington', 'killybegs', 'beringovskiy', 'rikitea', 'skala', 'arraial do cabo', 'clyde river', 'andra', 'afmadu', 'hamilton', 

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [5]:
# Practice json request see how the data is formatted
# api_key = 'd24a3d031ce70c78bdcc391965d1f94c'
url = 'https://api.openweathermap.org/data/2.5/weather?&q=London&appid=' + api_key

response = requests.get(url)
weather_data = response.json()

weather_response = requests.get(url)
weather_json = weather_response.json()

# Get the temperature from the response
print(f"The weather API responded with: {weather_json}.")


The weather API responded with: {'coord': {'lon': -0.1257, 'lat': 51.5085}, 'weather': [{'id': 802, 'main': 'Clouds', 'description': 'scattered clouds', 'icon': '03d'}], 'base': 'stations', 'main': {'temp': 284.36, 'feels_like': 283.29, 'temp_min': 282.16, 'temp_max': 286.01, 'pressure': 1029, 'humidity': 67}, 'visibility': 10000, 'wind': {'speed': 2.06, 'deg': 160}, 'clouds': {'all': 37}, 'dt': 1676393138, 'sys': {'type': 2, 'id': 2019646, 'country': 'GB', 'sunrise': 1676359037, 'sunset': 1676394740}, 'timezone': 0, 'id': 2643743, 'name': 'London', 'cod': 200}.


In [6]:
import requests

# Set the API base URL
base_url = "http://api.openweathermap.org/data/2.5/weather?"
# weather_api_key = api_key

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    endpoint = f"{base_url}q={city}&appid={api_key}"
    
    # Log the endpoint, record, and set numbers
    print(f"Processing Record {record_count} of Set {set_count} | {city}")

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Send an API request for each city
        response = requests.get(endpoint)

        # Parse the JSON and retrieve data
        city_weather = response.json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]
        
#         # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date
                         })
                          

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | tuktoyaktuk
Processing Record 2 of Set 1 | avarua
Processing Record 3 of Set 1 | atasu
Processing Record 4 of Set 1 | burnie
Processing Record 5 of Set 1 | kapaa
Processing Record 6 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 7 of Set 1 | ushuaia
Processing Record 8 of Set 1 | manga
Processing Record 9 of Set 1 | nikolskoye
Processing Record 10 of Set 1 | mattru
Processing Record 11 of Set 1 | chokurdakh
Processing Record 12 of Set 1 | hilo
Processing Record 13 of Set 1 | busselton
Processing Record 14 of Set 1 | brainerd
Processing Record 15 of Set 1 | vanimo
Processing Record 16 of Set 1 | nanortalik
Processing Record 17 of Set 1 | east london
Processing Record 18 of Set 1 | punta arenas
Processing Record 19 of Set 1 | mys shmidta
City not found. Skipping...
Processing Record 20 of Set 1 | ribeira grande
Processing Record 21 of Set 1 | cape town
Processing Record 22 of Se

Processing Record 40 of Set 4 | louisbourg
City not found. Skipping...
Processing Record 41 of Set 4 | victoria
Processing Record 42 of Set 4 | severomuysk
Processing Record 43 of Set 4 | los llanos de aridane
Processing Record 44 of Set 4 | parla
Processing Record 45 of Set 4 | tilichiki
Processing Record 46 of Set 4 | port macquarie
Processing Record 47 of Set 4 | tsihombe
City not found. Skipping...
Processing Record 48 of Set 4 | ayan
Processing Record 49 of Set 4 | ife
Processing Record 0 of Set 5 | okandja
City not found. Skipping...
Processing Record 1 of Set 5 | halalo
City not found. Skipping...
Processing Record 2 of Set 5 | abrau-dyurso
Processing Record 3 of Set 5 | bedele
Processing Record 4 of Set 5 | sompeta
Processing Record 5 of Set 5 | nalut
Processing Record 6 of Set 5 | peace river
Processing Record 7 of Set 5 | la ronge
Processing Record 8 of Set 5 | khorramshahr
Processing Record 9 of Set 5 | havre-saint-pierre
Processing Record 10 of Set 5 | kavieng
Processing Re

Processing Record 27 of Set 8 | agapovka
Processing Record 28 of Set 8 | bengkulu
Processing Record 29 of Set 8 | wilmington
Processing Record 30 of Set 8 | bud
Processing Record 31 of Set 8 | samusu
City not found. Skipping...
Processing Record 32 of Set 8 | mehamn
Processing Record 33 of Set 8 | dolores
Processing Record 34 of Set 8 | champerico
Processing Record 35 of Set 8 | tura
Processing Record 36 of Set 8 | coihaique
Processing Record 37 of Set 8 | rafaela
Processing Record 38 of Set 8 | eldikan
City not found. Skipping...
Processing Record 39 of Set 8 | chernigovka
Processing Record 40 of Set 8 | prince rupert
Processing Record 41 of Set 8 | suileng
Processing Record 42 of Set 8 | wanganui
Processing Record 43 of Set 8 | cedar city
Processing Record 44 of Set 8 | sorland
Processing Record 45 of Set 8 | devonport
Processing Record 46 of Set 8 | yemtsa
Processing Record 47 of Set 8 | tawkar
City not found. Skipping...
Processing Record 48 of Set 8 | carutapera
Processing Record 

Processing Record 13 of Set 12 | rosarito
Processing Record 14 of Set 12 | obo
Processing Record 15 of Set 12 | yanan
City not found. Skipping...
Processing Record 16 of Set 12 | chicama
Processing Record 17 of Set 12 | awjilah
Processing Record 18 of Set 12 | aswan
Processing Record 19 of Set 12 | yenagoa
Processing Record 20 of Set 12 | tyup
Processing Record 21 of Set 12 | cockburn harbour
City not found. Skipping...
Processing Record 22 of Set 12 | mangrol
Processing Record 23 of Set 12 | yabelo
Processing Record 24 of Set 12 | kerema
Processing Record 25 of Set 12 | persianovskiy
Processing Record 26 of Set 12 | anadyr
Processing Record 27 of Set 12 | gorin
Processing Record 28 of Set 12 | nome
Processing Record 29 of Set 12 | kindu
Processing Record 30 of Set 12 | ler
Processing Record 31 of Set 12 | deer lake
Processing Record 32 of Set 12 | santa maria
Processing Record 33 of Set 12 | ambulu
Processing Record 34 of Set 12 | shelburne
Processing Record 35 of Set 12 | eirunepe
Pr

In [None]:
city_data


In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)

city_data_df



In [None]:
# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df.head()

In [None]:

# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature

# Set the size and style of the markers
markersize=15
markerstyle='o'

# Set the opacity of the markers
alpha=0.6

# Plot the scatter plot with the specified markers and alpha value
city_data_df.plot.scatter(x='Lat', y='Max Temp', s=markersize, marker=markerstyle, alpha=alpha)

# Add the x-axis label
plt.xlabel('Latitude')

# Add the y-axis label
plt.ylabel('Max Temperature (C)')

# Set the plot title
plt.title('City Latitude vs Temperature')

# Show the plot
plt.show()

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()


#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
city_data_df.plot.scatter(x='Lat', y='Humidity', s=markersize, marker=markerstyle, alpha=alpha)

# Incorporate the other graph properties
# Set the size and style of the markers
markersize=15
markerstyle='o'

# Set the opacity of the markers
alpha=0.6

# Add the x-axis label
plt.xlabel('Latitude')

# Add the y-axis label
plt.ylabel('Humididty %')

# Set the plot title
plt.title('City Latitude vs Humidity')


# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
city_data_df.plot.scatter(x='Lat', y="Cloudiness", s=markersize, marker=markerstyle, alpha=alpha)

# Incorporate the other graph properties

# Set the size and style of the markers
markersize=15
markerstyle='o'

# Set the opacity of the markers
alpha=0.6

# Add the x-axis label
plt.xlabel('Latitude')

# Add the y-axis label
plt.ylabel('Cloudiness')

# Set the plot title
plt.title('City Latitude vs Cloudiness')


# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
city_data_df.plot.scatter(x='Lat', y='Wind Speed', s=markersize, marker=markerstyle, alpha=alpha)

# Incorporate the other graph properties

# Set the size and style of the markers
markersize=15
markerstyle='o'

# Set the opacity of the markers
alpha=0.6

# Add the x-axis label
plt.xlabel('Latitude')

# Add the y-axis label
plt.ylabel('Wind Speed')

# Set the plot title
plt.title('City Latitude vs Wind Speed')

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots


def plot_linear_regression(x, y, y_pred, title, xlabel, ylabel):
    plt.scatter(x, y, color='blue')
    plt.plot(x, y_pred, color='red')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()



In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df[city_data_df["Lat"] >= 0]

# Display sample data
northern_hemi_df.head()


In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)


southern_hemi_df = city_data_df[city_data_df["Lat"] <= 0]

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
x = northern_hemi_df["Lat"]
y = northern_hemi_df["Max Temp"]

#create basic scatterplot
plt.plot(x, y, 'o')

#obtain m (slope) and b(intercept) of linear regression line
m, b = np.polyfit(x, y, 1)

#add linear regression line to scatterplot 
plt.plot(x, m*x+b)

In [None]:
#Correlation Coefecient
correlation = northern_hemi_df["Lat"].corr(northern_hemi_df["Max Temp"])

print("The correlation coefficient between lattitude and temperature  is:", correlation)

In [None]:
# Linear regression on Southern Hemisphere

x = southern_hemi_df["Lat"]
y = southern_hemi_df["Max Temp"]

#create basic scatterplot
plt.plot(x, y, 'o')

#obtain m (slope) and b(intercept) of linear regression line
m, b = np.polyfit(x, y, 1)

#add linear regression line to scatterplot 
plt.plot(x, m*x+b)


In [None]:
#Correlation Coefecient
correlation = southern_hemi_df["Lat"].corr(southern_hemi_df["Max Temp"])

print("The correlation coefficient between lattitude and temperature  is:", correlation)

**Discussion about the linear relationship:** 

#### The correlation coefficient measures the strength and direction of a linear relationship between two variables. A coefficient of 1 means a perfect positive relationship, -1 a perfect negative relationship and 0 means no relationship at all.

#### In the Northern Hemisphere, the correlation coefficient between latitude and temperature is -0.87, indicating a strong negative relationship. This suggests that as you move towards the poles (increasing latitude), the temperature decreases.

#### In the Southern Hemisphere, the correlation coefficient between latitude and temperature is 0.42, indicating a moderate positive relationship. This suggests that as you move towards the equator (decreasing latitude), the temperature increases.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
x = northern_hemi_df["Lat"]
y = northern_hemi_df["Humidity"]

#create basic scatterplot
plt.plot(x, y, 'o')

#obtain m (slope) and b(intercept) of linear regression line
m, b = np.polyfit(x, y, 1)

#add linear regression line to scatterplot 
plt.plot(x, m*x+b)


In [None]:
#Correlation Coefecient
correlation = northern_hemi_df["Lat"].corr(northern_hemi_df["Humidity"])

print("The correlation coefficient between lattitude and humidity  is:", correlation)

In [None]:

# Southern Hemisphere
x = southern_hemi_df["Lat"]
y = southern_hemi_df["Humidity"]

#create basic scatterplot
plt.plot(x, y, 'o')

#obtain m (slope) and b(intercept) of linear regression line
m, b = np.polyfit(x, y, 1)

#add linear regression line to scatterplot 
plt.plot(x, m*x+b)


In [None]:
#Correlation Coefecient
correlation = southern_hemi_df["Lat"].corr(southern_hemi_df["Humidity"])

print("The correlation coefficient between lattitude and humididty  is:", correlation)

**Discussion about the linear relationship:** 


#### The correlation coefficient measures the linear relationship between two variables, with a value of 1 indicating a perfect positive correlation, -1 indicating a perfect negative correlation, and 0 indicating no correlation. 

#### In the case of the Northern hemisphere, the correlation coefficient between latitude and humidity is 0.50, which suggests a moderate positive correlation. 

#### In the case of the Southern hemisphere, the correlation coefficient between latitude and humidity is 0.34, which suggests a weaker positive correlation. These values indicate that latitude and humidity have a positive relationship in both hemispheres, but the relationship is stronger in the Northern hemisphere.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
import seaborn as sns

sns.regplot(x=northern_hemi_df['Lat'], y=northern_hemi_df['Cloudiness'], data=northern_hemi_df)

plt.show

In [None]:
#Correlation Coefecient
correlation = northern_hemi_df["Lat"].corr(northern_hemi_df["Cloudiness"])

print("The correlation coefficient between cloudiness and lattitude linear regression  is:", correlation)

In [None]:
#Southern Hemisphere
sns.regplot(x=southern_hemi_df['Lat'], y=southern_hemi_df['Cloudiness'], data=southern_hemi_df)

plt.show

In [None]:



#Correlation Coefecient
correlation = northern_hemi_df["Lat"].corr(northern_hemi_df["Cloudiness"])

print("The correlation coefficient between lattitude and cloudiness linear regression is:", correlation)

**Discussion about the linear relationship:** 

#### The correlation coefficient between two variables is a measure of the strength and direction of the relationship between them. A correlation coefficient of 0.2645902036297792 indicates a weak positive correlation between cloudiness and lattitude. This means that as lattitude increases, cloudiness tends to increase as well, but the relationship is not strong.
 
#### The fact that the correlation coefficient is the same when calculating the relationship between lattitude and cloudiness as compared to cloudiness and lattitude, just shows that the correlation is symmetrical and the relationship between the two variables is the same in either direction.



### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
sns.regplot(x=northern_hemi_df['Lat'], y=northern_hemi_df['Wind Speed'], data=northern_hemi_df)

plt.show

In [None]:
#Correlation Coefecient
correlation = northern_hemi_df["Lat"].corr(northern_hemi_df["Wind Speed"])

print("The correlation coefficient between windspeed and lattitude linear regression  is:", correlation)

In [None]:
# Southern Hemisphere

sns.regplot(x=southern_hemi_df['Lat'], y=southern_hemi_df['Wind Speed'], data=southern_hemi_df)

plt.show

In [None]:
#Correlation Coefecient
correlation = southern_hemi_df["Lat"].corr(southern_hemi_df["Wind Speed"])

print("The correlation coefficient between latttitude and windspeed linear regression  is:", correlation)

**Discussion about the linear relationship:** 


#### The correlation coefficients between wind speed and latitude in both hemispheres suggest a weak relationship. In the southern hemisphere, there is a negative correlation (-0.42) meaning as latitude increases, wind speed decreases. In the northern hemisphere, the correlation coefficient is close to 0 (0.0028), indicating no significant relationship between wind speed and latitude.

#### Linear regression can be used to model the relationship between wind speed and latitude and make predictions. However, a weak correlation between the two variables may result in a poor model fit and unreliable predictions. Further analysis and consideration of other factors affecting wind speed is necessary to build a robust model.