# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json


# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
#np.random,seed(635)
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Set up URL with api key to create connection point
url = "http://api.openweathermap.org/data/2.5/forecast?id=524901&APPID=" + weather_api_key
# retrieve data on a single city weather.
# convert to json and obtain keys
test_response = requests.get(url + "&q=london&units=Imperial")
test_json = test_response.json()
# print json file 
print(json.dumps(test_json, indent=4, sort_keys=True))

In [None]:
# Create empty lists to place data in 
city_name = []
Lat = []
Lon = []
Max_temp = []
Humidity = []
Cloudiness = []
Wind_Speed = []
Country = []
Date = []
# begining initial print of cities
print(f"Beginning data retrieval")
print(f"---------------------------------")
# create for loop to loop through the cities in the json file 
# create url path 
# retrieve data on cites weather conditions from the url path 
# formal into jason
# create try excpet to skip cities with a KeyError and IndexError
# append json, with keys, to each empty list
# print cities and except
for i, city in enumerate(cities):

    url = f"http://api.openweathermap.org/data/2.5/forecast?id=524901&APPID={weather_api_key}&q={city}&units=Imperial"

    response = requests.get(url)
    response_json=response.json()
    try:
        
        city_name.append(response_json["city"]["name"])
        Lat.append(response_json["city"]["coord"]["lat"])
        Lon.append(response_json["city"]["coord"]["lon"])
        Max_temp.append(response_json["list"][0]["main"]["temp_max"])
        Humidity.append(response_json["list"][0]["main"]["humidity"])
        Cloudiness.append(response_json["list"][0]["clouds"]["all"])
        Wind_Speed.append(response_json["list"][0]["wind"]["speed"])
        Country.append(response_json["city"]["country"])
        Date.append(response_json["list"][0]["dt"])
        print(f"Processing Record {i} | {city}")
    except (KeyError, IndexError):
        print(f"error processing, next {city}")
# print end of data retrieval        
print(f"--------------------------------")
print(f"Data retrieval complete")
print(f"--------------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create dataframe using appended list 
# save dataframe to output csv
# print dataframe
Cities_Weather_df=pd.DataFrame({ "City": city_name, "Lat": Lat, "Lng": Lon, "Max Temp": Max_temp, "Humidity": Humidity, "Cloudiness": Cloudiness, "Wind Speed": Wind_Speed, "Country": Country, "Date": Date})
Cities_Weather_df.to_csv("Weather_Output.csv")
Cities_Weather_df

In [None]:
# print descristive statistics of dataframe varibales
Cities_Weather_df.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# print descristive statistics of dataframe varibales
Cities_Weather_df.describe()

In [None]:
# Get the indices of cities that have humidity over 100
Cities_Weather_gt100_index = Cities_Weather_df.index[Cities_Weather_df["Humidity"] > 100]
Cities_Weather_gt100_index

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
Clean_city_df = Cities_Weather_df.drop(Cities_Weather_gt100_index,inplace=False)
Clean_city_df.head()

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Create scatter plot for Latitude vs Temperature
# print scatter plot 
Date_Secs = Clean_city_df.iloc[0,8]
Date_full = str(pd.to_datetime(Date_Secs, unit="s"))
time_list = Date_full.split(" ")
date_list = time_list[0].split("-")
my_date= f"({date_list[1]}/{date_list[2]}/{date_list[0]})"
print(my_date)
Clean_city_df.plot(kind="scatter", x="Lat", y="Max Temp", grid=True, figsize=(8,8),
              title=f"City Latitude Vs. Max Temperature {my_date}")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature(F)")
plt.show()

In [4]:
print(f'The temperature increases as I move from the southern hemisphere to the equator. Then decreases in tempt as I move away from the equator.')

The temperature increases as I move from the southern hemisphere to the equator. Then decreases in tempt as I move away from the equator.


In [None]:
# Create scatter plot for Latitude vs Humidity 
# print scatter plot 
Date_Secs = Clean_city_df.iloc[0,8]
Date_full = str(pd.to_datetime(Date_Secs, unit="s"))
time_list = Date_full.split(" ")
date_list = time_list[0].split("-")
my_date= f"({date_list[1]}/{date_list[2]}/{date_list[0]})"
print(my_date)
Clean_city_df.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(8,8),
              title=f"City Latitude Vs. Humidity {my_date}")
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")

In [3]:
print(f'There appears to be a higher percentage of humidity the further away from the equator.')

There appears to be a higher percentage of humidity the further away from the equator.


## Latitude vs. Cloudiness Plot

In [None]:
# Create scatter plot for Latitude vs Cloudiness
# print scatter plot 
Date_Secs = Clean_city_df.iloc[0,8]
Date_full = str(pd.to_datetime(Date_Secs, unit="s"))
time_list = Date_full.split(" ")
date_list = time_list[0].split("-")
my_date= f"({date_list[1]}/{date_list[2]}/{date_list[0]})"
print(my_date)
Clean_city_df.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(8,8),
              title="Latitude Vs. Cloudiness")

In [5]:
print(f'Cloudiness appears to be eaully distributed.')

Cloudiness appears to be eaully distributed


## Latitude vs. Wind Speed Plot

In [None]:
# Create scatter plot for Latitude vs Wind Speed
# print scatter plot 
Date_Secs = Clean_city_df.iloc[0,8]
Date_full = str(pd.to_datetime(Date_Secs, unit="s"))
time_list = Date_full.split(" ")
date_list = time_list[0].split("-")
my_date= f"({date_list[1]}/{date_list[2]}/{date_list[0]})"
print(my_date)
Clean_city_df.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True,
              title="Latitude Vs. Wind Speed")

In [None]:
print(f'Wind speed appears to be grouped between 1 qand 15 mph.')

## Linear Regression

In [None]:
# Create dataframes for Northern and Southern hemisphere 
# pull the latitiude from the Clean dataframe
Northern_Hemisphere_df = Clean_city_df.loc[Clean_city_df["Lat"] >= 0, :]
southern_Hemisphere_df = Clean_city_df.loc[Clean_city_df["Lat"] <= 0, :]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the nothern hemisphere, Max Temp vs. Latitude
x_axis = Northern_Hemisphere_df["Lat"]
y_axis = Northern_Hemisphere_df["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Northern Hemisphere - Max Temp Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [2]:
print(f'The Model shows there is a negative regression between Max Temp and latitude. As the distance moves away from the equator, the max temp drops.' )

The Model shows there is a negative regression between Max Temp and latitude. As the distance moves away from the equator, the max temp drops.


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the southern hemisphere, Max Temp vs. Latitude
x_axis = southern_Hemisphere_df["Lat"]
y_axis = southern_Hemisphere_df["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Southern Hemisphere - Max Temp Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [3]:
print(f'There is a positive relationship between max temp and the latitude. As distnce moves toward the equator, the max temp increases.')

There is a positive relationship between max temp and the latitude. As distnce moves toward the equator, the max temp increases.


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the nothern hemisphere, Humidity(%) vs. Latitude
x_axis = Northern_Hemisphere_df["Lat"]
y_axis = Northern_Hemisphere_df["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Northern Hemisphere - Humidity (%) Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [4]:
print('There is nearly zero gression relationship between humidity and latitude in the northern. Showin the humidity percentage is equally distributed.')

There is nearly zero gression relationship between humidity and latitude.


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the southern hemisphere, Humidity(%) vs. Latitude
x_axis = southern_Hemisphere_df["Lat"]
y_axis = southern_Hemisphere_df["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Southern Hemisphere - Humidity(%) Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity(%")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [6]:
print(f'There is a slight negative linear relationship in the southern Hhemisphere. As the ddistance from the equator decreases. Humidity decreases.' )

There is a slight negative linear relationship in the southern Hhemisphere. As the ddistance from the equator decreases. Humidity decreases.


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the nothern hemisphere, Cloudiness(%) vs. Latitude
x_axis = Northern_Hemisphere_df["Lat"]
y_axis = Northern_Hemisphere_df["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Northern Hemisphere - Cloudiness(%) Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [7]:
print(f'There is a very slight positve libear realtionship. Only slighty is there a higher percentage of clouds fruther away from the equator.')

There is a very slight positve libear realtionship. Only slighty is there a higher percentage of clouds fruther away from the equator.


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the southern hemisphere, Cloudiness(%) vs. Latitude
x_axis = southern_Hemisphere_df["Lat"]
y_axis = southern_Hemisphere_df["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Southern Hemisphere - Cloudiness(%) Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [8]:
print(f'There is a slight neagtive linear relationshiprThere is a lower percentage of clouds closer to the equator.')

There is a slight neagtive linear relationshiprThere is a lower percentage of clouds closer to the equator.


####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the nothern hemisphere, Wind Speed (mph) vs. Latitude
x_axis = Northern_Hemisphere_df["Lat"]
y_axis = Northern_Hemisphere_df["Wind Speed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Northern Hemisphere - Wind Speed(mph) Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed(mph)")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [9]:
print(f'There is a positive linear relationship. As the disctance from the equator increaes the wind speed increases.')

There is a positive linear relationship. As the disctance from the equator increaes the wind speed increases.


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# plot and perform regressiona analysis for the southern hemisphere, Wind Speed (mph) vs. Latitude
x_axis = southern_Hemisphere_df["Lat"]
y_axis = southern_Hemisphere_df["Wind Speed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title("Southern Hemisphere - Wind Speed(mph) Vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed(mph)")
print(f"The r-value is: {rvalue:.2f}") 
plt.show()

In [10]:
print(f'There is a negative linear relationship. As the distance from the equator decreases the wind speed also decreases.')

There is a negative linear relationship. As the distance from the equator decreases the wind speed also decreases.
