# WeatherPy
----

#### Observations:
    * 

In [1]:
! pip install citipy



In [2]:
# Dependencies
import requests
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from citipy import citipy
from urllib.parse import quote
%matplotlib notebook 

# Import Keys
from config import weather_api_key



## Generate Cities List

In [3]:
# Create empty list for cities
cities =[]

# Create random lat and lon using numpy.random.uniform for uniform distribution
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)

# Use zip function to combine lat and lng
gcoords = zip(lats, lngs)

#Create for loop to append to list
for gcoord in gcoords:
    city = citipy.nearest_city(gcoord[0], gcoord[1]).city_name
    if city not in cities:
        cities.append(city)
len(cities)

610

In [4]:
#Build partial query URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
api_key = weather_api_key

query_url = f"{url}appid={api_key}&units={units}&q="

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [5]:
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
city_name = []
lat = []
lng = []

counter = 1

print(f"Beginning Data Retrieval")
print(f"-----------------------------")
      
for city in cities:
    response = requests.get(query_url + city).json()
    
    try:
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        city_name.append(response['name'])
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        print(f"Processing Record {counter} | {city}")
        
        counter = counter + 1
                   
        # To test a small sample to ensure code works
        #if counter == 5:
            #break
    except:
        print(f" City not found. Skipping...")
print(f"-----------------------------")
print(f"Data Retrieval Complete")
print(f"-----------------------------")


Beginning Data Retrieval
-----------------------------
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...
 City not found. Skipping...


KeyboardInterrupt: 

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [2]:
# Create a data frame, using dict function
weather_dict = {
    "City": city_name,
    "Lat": lat,
    "Lng": lng,
    "Max Temp (F)": max_temp,
    "Humidity (%)": humidity,
    "Cloudiness (%)": cloudiness, 
    "Wind Speed (mph)": wind_speed,
    "Country": country,
    "Date": date}

df_weatherpy = pd.DataFrame(weather_dict)
df_weatherpy.count()

NameError: name 'city_name' is not defined

In [26]:
df_weatherpy.to_csv("Weather_Data.csv")

In [4]:
df_weatherpy = pd.read_csv('Weather_Data.csv').drop('Unnamed: 0', axis=1)

df_weatherpy.head()

Unnamed: 0,City,Lat,Lng,Max Temp (F),Humidity (%),Cloudiness (%),Wind Speed (mph),Country,Date
0,Huntsville,34.73,-86.59,79.0,50,1,5.82,US,1602705313
1,Murray Bridge,-35.12,139.27,75.0,37,100,5.99,AU,1602705321
2,Biskamzha,53.45,89.53,32.04,97,100,4.56,RU,1602705336
3,Rikitea,-23.12,-134.97,73.63,80,27,19.46,PF,1602705297
4,Lompoc,34.64,-120.46,96.8,18,1,14.99,US,1602705337


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [5]:
#  Get the indices of cities that have humidity over 100%.
df_weatherpy["Humidity (%)"].sort_values()
# There are no cities in this dataframe that has humidity over 100%

410      8
28       9
376     10
472     11
90      11
      ... 
390    100
158    100
575    100
151    100
238    100
Name: Humidity (%), Length: 579, dtype: int64

In [6]:
df_weatherpy.describe()

Unnamed: 0,Lat,Lng,Max Temp (F),Humidity (%),Cloudiness (%),Wind Speed (mph),Date
count,579.0,579.0,579.0,579.0,579.0,579.0,579.0
mean,19.259396,15.840449,63.580984,68.998273,48.069085,8.261554,1602705000.0
std,32.763466,90.06441,19.51622,22.727854,40.164352,5.40802,86.69996
min,-54.8,-179.17,10.67,8.0,0.0,0.54,1602705000.0
25%,-7.97,-63.03,48.315,56.0,1.0,4.17,1602705000.0
50%,22.57,21.55,68.0,76.0,41.0,6.93,1602705000.0
75%,46.425,92.69,78.8,86.0,90.0,11.375,1602705000.0
max,78.22,178.42,99.0,100.0,100.0,31.18,1602706000.0


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [39]:
fig, ax = plt.subplots(figsize=(8,5))
ax.scatter(df_weatherpy['Lat'], df_weatherpy['Max Temp (F)'], 
           marker='o', facecolors='#9ffeb0', edgecolor='black')
ax.set(xlabel="Latitude", ylabel="Max Temperature (F)",
      title= "Maximum Temperature per City by Latitude (10/15/2020)")
ax.grid()
plt.savefig("Fig1.png");

<IPython.core.display.Javascript object>

##### Latitude vs. Temperature Plot (Fig 1 )
The maximum temperature for the cities located near the equator, or 0 latitude are experiencing the hottest temperatures.  The further the cities are away from the equator, the cooler the temperatures.

In [9]:
def scatplot(x_values, y_values, x_label, y_label):
    fig, ax = plt.subplots(figsize=(8,5))
    ax.scatter(x_values, y_values, 
               marker='o', facecolors='#9ffeb0', edgecolor='black')
    ax.set(xlabel=x_label, ylabel=y_label,
          )
    ax.grid()

## Latitude vs. Humidity Plot

In [10]:
ax2 = scatplot(df_weatherpy['Lat'], df_weatherpy['Humidity (%)'], "City Latitude", "Humidity (%)")
plt.title("Humidity per City by Latitude (10/15/2020)")
plt.savefig("Fig2.png");

<IPython.core.display.Javascript object>

##### Latitude vs. Humidity Plot (Fig 2)
Despite many of the cities experiencing around 80% to 100% humidity levels, there does not appear to be a relationship between latitude and humidity.  Other factors would have be to investigated, such as a city's proximity to bodies of water or altitude of a city.

## Latitude vs. Cloudiness Plot

In [11]:
ax3 = scatplot(df_weatherpy['Lat'], df_weatherpy['Cloudiness (%)'], "Latitude", "Cloudiness (%)")
plt.title("Cloudiness per City By Latitude (10/15/2020)")
plt.savefig("Fig3.png");

<IPython.core.display.Javascript object>

##### Latitude vs. Cloudiness Plot (Fig 3)
By comparing a city's latitude to its cloud levels, there is not a relationship to be detected.  

## Latitude vs. Wind Speed Plot

In [12]:
ax4 = scatplot(df_weatherpy['Lat'], df_weatherpy['Wind Speed (mph)'], "Latitude", "Wind Speed (mph)")
plt.title("Wind Speed per City by Latitude (10/14/2020)")
plt.savefig("Fig4.png");

<IPython.core.display.Javascript object>

##### Latitude vs. Wind Speed Plot (Fig 4)
Outside of some cities experiencing wind speeds of 15mph or more, the majority of the cities have wind speeds of less than 15mph.  However, this trend is spans for both the Northern and Southern Hemispheres.

## Linear Regression

In [13]:
# Create df for the two hemispheres

north = df_weatherpy.loc[df_weatherpy['Lat'] >= 0.0]
north.head()

Unnamed: 0,City,Lat,Lng,Max Temp (F),Humidity (%),Cloudiness (%),Wind Speed (mph),Country,Date
0,Huntsville,34.73,-86.59,79.0,50,1,5.82,US,1602705313
2,Biskamzha,53.45,89.53,32.04,97,100,4.56,RU,1602705336
4,Lompoc,34.64,-120.46,96.8,18,1,14.99,US,1602705337
6,Butaritari,3.07,172.79,81.9,76,100,11.54,KI,1602705337
7,Pangody,65.85,74.49,31.57,93,0,14.79,RU,1602705337


In [14]:
south = df_weatherpy.loc[df_weatherpy['Lat'] < 0.0]
south.head()

Unnamed: 0,City,Lat,Lng,Max Temp (F),Humidity (%),Cloudiness (%),Wind Speed (mph),Country,Date
1,Murray Bridge,-35.12,139.27,75.0,37,100,5.99,AU,1602705321
3,Rikitea,-23.12,-134.97,73.63,80,27,19.46,PF,1602705297
5,Ushuaia,-54.8,-68.3,48.2,57,40,12.75,AR,1602705337
9,Mataura,-46.19,168.86,39.0,93,0,1.99,NZ,1602705295
10,Omboué,-1.57,9.26,75.78,80,98,8.97,GA,1602705338


In [15]:
from scipy import stats
def linregressplot(x_values, y_values, x_label, y_label, label_loc):
    
    fig, ax = plt.subplots(figsize=(8,5));
    ax.scatter(x_values, y_values, marker="o", color='b');
    ax.set(xlabel=x_label,
          ylabel=y_label);
    
    slope, intercept, rvalue, pvalue, _ = stats.linregress(x_values, y_values)
    y_predicted = [slope*x + intercept for x in x_values]
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    ax.plot(x_values, y_predicted, color='r')
    plt.annotate(line_eq, label_loc, fontsize=10,color="red")
    
    print(f"The rvalue is {rvalue}.");

##  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [16]:
ax5= linregressplot(north['Lat'], north['Max Temp (F)'], "Latitude", "Max Temp (F)", (0, 10))
plt.title("Maximum Temperatures for Latitudes in the Northern Hemisphere (10/15/2020)")
plt.savefig("Fig5.png");

<IPython.core.display.Javascript object>

The rvalue is -0.840498012428487.


##### Northern Hemisphere - Maximum Temperatures (Fig 5)
As a city is farther away from the equator, the cooler the temperatures are.

## Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [19]:
ax6= linregressplot(south['Lat'], south['Max Temp (F)'], "Latitude", "Max Temp (F)", (-20, 40))
plt.title("Maximum Temperatures for Latitudes in the Southern Hemisphere (10/15/2020)")
plt.savefig("Fig6.png");

<IPython.core.display.Javascript object>

The rvalue is 0.6483715933593129.


##### Southern Hemisphere - Maximum Temperatures (Fig 6)
Alternately, the closer a city is to the equator, the hotter the temperature.

##  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [37]:
ax7= linregressplot(north['Lat'], north['Humidity (%)'], "Latitude", "Humidity (%)", (50, 10))
plt.title("Humidity for Latitudes in the Northern Hemisphere (10/15/2020)")
plt.savefig("Fig7.png");

<IPython.core.display.Javascript object>

The rvalue is 0.19703461739163575.


##### Northern Hemisphere - Humidity (Fig 7)
The humidity in the Northern Hemisphere appears to have a weak correlation, confirmed by the r-value.  

##  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [38]:
ax8= linregressplot(south['Lat'], south['Humidity (%)'], "Latitude", "Humidity (%)", (-20, 15))
plt.title("Humidity for Latitudes in the Southern Hemisphere (10/15/2020)")
plt.savefig("Fig8.png");

<IPython.core.display.Javascript object>

The rvalue is 0.17091072954915124.


##### Southern Hemisphere - Humidity (Fig 8)
Similar to looking at the humidity percentages in the Northern Hemisphere against a city's latitudinal location, in the Southern Hemisphere, there is also a weak correlation.

##  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [26]:
ax9= linregressplot(north['Lat'], north['Cloudiness (%)'], "Latitude", "Cloudiness (%) ", (50, 10))
plt.title("Cloudiness for Latitudes in the Northern Hemisphere (10/15/2020)")
plt.savefig("Fig9.png");

<IPython.core.display.Javascript object>

The rvalue is 0.1630629954834948.


##### Northern Hemisphere - Cloudiness (Fig 9)
The relationship between cloud coverage in a city and its latitude does not have a correlation.

##  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [31]:
ax10= linregressplot(south['Lat'], south['Cloudiness (%)'], "Latitude", "Cloudiness (%) ", (-55, 30))
plt.title("Cloudiness for Latitudes in the Southern Hemisphere (10/15/2020)")
plt.savefig("Fig10.png");

<IPython.core.display.Javascript object>

The rvalue is 0.3649135014204588.


##### Southern Hemisphere - Cloudiness (Fig 10)
Unlike in the Northern Hemisphere, as the location of a city nears the equator, the cloud coverage increases.  This could also be a result of the randomized list of cities.

##  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [33]:
ax11= linregressplot(north['Lat'], north['Wind Speed (mph)'], "Latitude", "Wind Speed (mph)", (0, 30))
plt.title("Wind Speeds for Latitudes in the Northern Hemisphere (10/15/2020)")
plt.savefig("Fig11.png");

<IPython.core.display.Javascript object>

The rvalue is 0.24678792540868344.


##### Northern Hemisphere - Wind Speed (Fig 11)
The wind speeds vary across most of the latitudinal coordinates, but there is a modest increase in wind speeds as we near the North Pole.

##  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [36]:
ax12= linregressplot(south['Lat'], south['Wind Speed (mph)'], "Latitude", "Wind Speed (mph)", (-55, 7))
plt.title("Wind Speeds for Latitudes in the Southern Hemisphere (10/15/2020)")
plt.savefig("Fig12.png");

<IPython.core.display.Javascript object>

The rvalue is -0.1427391711300915.


##### Southern Hemisphere - Wind Speed (Fig 12)
A small regression indicates decreasing wind speeds as a city is closer to the equator, but more analysis is required to assess if there is in fact a relationship.