# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
today = time.strftime("%m/%d/%Y")
print(len(cities))
# print(cities[0])

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
list_dict = {'City Name': [], 'City ID': [], 'Latitude': [], 'Longitude':[], 'Temperature (F)': [], 'Humidity (%)': [], 'Cloudiness (%)': [], 'Wind Speed (mph)': []}
skip_count=0

for city in cities:
    url=f'https://api.openweathermap.org/data/2.5/weather?q={city}&units=imperial&appid={weather_api_key}'
#     print(url)
    response=requests.get(url).json()
#     print(type(response))
#     print(city)
#     print(response['name'])
#     print(json.dumps(response, indent=4, sort_keys=True))
    try:
        city_name=response['name'].title()
        city_id=response['id']
    except KeyError:
        print("City not found... skipping.")
        skip_count+=1
        print(f'Skipped Cities = {skip_count}')
        print(f'----------------------------')
        continue
    if city != city_name:
        if city_id!=response['id']:
            print("City not found... skipping.")
            skip_count+=1
            print(f'Skipped Cities = {skip_count}')
            print(f'----------------------------')
            continue
    print(f'City Name = {city_name}')
    list_dict['City Name'].append(city_name)
#     print(list_dict['City Name'])
    print(f'Searched City Name = {city}')
    print(f'City ID = {city_id}')
    list_dict['City ID'].append(city_id)
#     print(list_dict['City ID'])
    lat=response['coord']['lat']
    print(f'Latitude = {lat}')
    list_dict['Latitude'].append(lat)
#     print(list_dict['Latitude'])
    lon=response['coord']['lon']
    print(f'Longitude = {lon}')
    list_dict['Longitude'].append(lon)
#     print(list_dict['Longitude'])
    temp=response['main']['temp']
    print(f'Temperature = {temp}F')
    list_dict['Temperature (F)'].append(temp)
#     print(list_dict['Temperature (F)'])
    humidity=response['main']['humidity']
    print(f'Humidity = {humidity}%')
    list_dict['Humidity (%)'].append(humidity)
#     print(list_dict['Humidity (%)'])
    clouds=response['clouds']['all']
    print(f'Cloudiness = {clouds}% cloud coverage')
    list_dict['Cloudiness (%)'].append(clouds)
#     print(list_dict['Cloudiness (%)'])
    wind_speed=response['wind']['speed']
    print(f'Wind Speed = {wind_speed}mph')
    list_dict['Wind Speed (mph)'].append(wind_speed)
#     print(list_dict['Wind Speed (mph)'])
#     print(f'Skipped Cities = {skip_count}')
    print(f'----------------------------')
print(f'Skipped Cities = {skip_count}')
total_cities=len(list_dict['City Name'])
print(f'Total Cities = {total_cities}')

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
city_weather_df=pd.DataFrame(list_dict)
# print(len(city_weather_df['City ID']))
duplicate_check=city_weather_df['City ID'].unique()
# print(len(duplicate_check))
duplicate_city_id = city_weather_df.loc[city_weather_df.duplicated(subset=['City ID']),'City ID'].unique()
# print(duplicate_city_id)
city_weather_df = city_weather_df[city_weather_df['City ID'].isin(duplicate_city_id)==False]
# print(len(city_weather_df))
city_weather_df.to_csv('../output_data/cities.csv')

In [None]:
city_weather_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
humidity_over=city_weather_df.loc[city_weather_df['Humidity (%)'] >100]
# print(type(humidity_over))
humidity_over.head()

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
x_axis=city_weather_df['Latitude'].tolist()
# print(type(y_axis))
max_lat=max(x_axis)
min_lat=min(x_axis)
range_lat=max_lat-min_lat
# print(len(x_axis),max_lat,min_lat)
y_axis=city_weather_df['Temperature (F)'].tolist()
# print(type(x_axis))
max_temp=max(y_axis)
min_temp=min(y_axis)
range_temp=max_temp-min_temp
# print(len(y_axis),max_temp,min_temp)

#Graphing functions
plt.scatter(x_axis, y_axis)
plt.xlabel('Latitude')
plt.ylabel('Temperature (F)')
plt.title(f'Figure 1. Temperature (F) vs. City Latitude ({today})')
plt.xlim(-round(max_lat+5),round(max_lat+5))
plt.ylim(round(min_temp-5), round(max_temp+5))

plt.tight_layout()

plt.savefig("../output_data/Figure1.png")
plt.show()
print(f"Figure 1 shows a city's latitude and its current temperature in degrees Fahrenheit as of {today}.")

## Latitude vs. Humidity Plot

In [None]:
y_axis=city_weather_df['Humidity (%)'].tolist()
max_hum=max(y_axis)
min_hum=min(y_axis)
range_hum=max_hum-min_hum
# print(range_hum,max_hum,min_hum)

#Graphing functions
plt.scatter(x_axis, y_axis)
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'Figure 2. Humidity (%) vs. City Latitude ({today})')
plt.xlim(-round(max_lat+5),round(max_lat+5))
plt.ylim(-2, 102)

plt.tight_layout()

plt.savefig("../output_data/Figure2.png")
plt.show()
print(f"Figure 2 shows a city's latitude and its current percent humidity as of {today}.")

## Latitude vs. Cloudiness Plot

In [None]:
y_axis=city_weather_df['Cloudiness (%)'].tolist()
max_cloud=max(y_axis)
min_hum=min(y_axis)
range_hum=max_hum-min_hum
# print(range_hum,max_hum,min_hum)

#Graphing functions
plt.scatter(x_axis, y_axis)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title(f'Figure 3. Cloudiness (%) vs. City Latitude ({today})')
plt.xlim(-round(max_lat+5),round(max_lat+5))
plt.ylim(-2, 102)

plt.tight_layout()

plt.savefig("../output_data/Figure3.png")
plt.show()
print(f"Figure 3 shows a city's latitude and its current percent cloud cover as of {today}.")

## Latitude vs. Wind Speed Plot

In [None]:
y_axis=city_weather_df['Wind Speed (mph)'].tolist()
max_wind=max(y_axis)
min_wind=min(y_axis)
range_wind=max_hum-min_hum
# print(range_wind,max_wind,min_wind)

#Graphing functions
plt.scatter(x_axis, y_axis)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f'Figure 4. Wind Speed (mph) vs. City Latitude ({today})')
plt.xlim(-round(max_lat+5),round(max_lat+5))
plt.ylim(-1, round(max_wind+3))

plt.tight_layout()

plt.savefig("../output_data/Figure4.png")
plt.show()
print(f"Figure 4 shows a city's latitude and its current wind speed in miles per hour as of {today}.")

## Linear Regression

In [None]:
northern_lat_df = city_weather_df.loc[city_weather_df['Latitude']>=0]
southern_lat_df = city_weather_df.loc[city_weather_df['Latitude']<0]

northern_lat_df

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = northern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = northern_lat_df['Temperature (F)']
max_temp=max(y_values)
min_temp=min(y_values)
range_temp=max_temp-min_temp

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(6,10),color="red")
plt.xlabel('Latitude')
plt.ylabel('Temperature (F)')
plt.title(f'Figure 5. Temperature (F) vs. City Latitude in the Northern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_temp-5), round(max_temp+5))
plt.tight_layout()

plt.savefig("../output_data/Figure5.png")
plt.show()

print(f"Figure 5 shows a strong, negative, linear relationship between a city's latitude in the Northern Hemishpere and its current temperature in degrees Fahrenheit as of {today}.")
print(f"The greater a city's latitude is, the more likely it will have a lower temperature.")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = southern_lat_df['Temperature (F)']
max_temp=max(y_values)
min_temp=min(y_values)
range_temp=max_temp-min_temp

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(-30,40),color="red")
plt.xlabel('Latitude')
plt.ylabel('Temperature (F)')
plt.title(f'Figure 6. Temperature (F) vs. City Latitude in the Southern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_temp-5), round(max_temp+5))
plt.tight_layout()

plt.savefig("../output_data/Figure6.png")
plt.show()

print(f"Figure 6 shows a moderate, positive, linear relationship between a city's latitude in the Southern Hemishpere and its current temperature in degrees Fahrenheit as of {today}.")
print(f"If a city is closer to the equator, it is moderatlely more likely to have a higher temperature.")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = northern_lat_df['Humidity (%)']
max_y=max(y_values)
min_y=min(y_values)
range_y=max_y-min_y

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(6,60),color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'Figure 7. Humidity (%) vs. City Latitude in the Northern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_y-5), round(max_y+5))
plt.tight_layout()

plt.savefig("../output_data/Figure7.png")
plt.show()

print(f"Figure 7 does not show a significant relationship between a city's latitude in the Northern Hemishpere and its current percent humid as of {today}.")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = southern_lat_df['Humidity (%)']
max_y=max(y_values)
min_y=min(y_values)
range_y=max_y-min_y

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(-50,40),color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'Figure 8. Humidity (%) vs. City Latitude in the Southern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_y-5), round(max_y+5))
plt.tight_layout()

plt.savefig("../output_data/Figure8.png")
plt.show()

print(f"Figure 8 does not show a significant relationship between a city's latitude in the Southern Hemishpere and its current percent humid as of {today}.")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = northern_lat_df['Cloudiness (%)']
max_y=max(y_values)
min_y=min(y_values)
range_y=max_y-min_y

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(6,40),color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'Figure 9. Cloudiness (%) vs. City Latitude in the Northern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_y-5), round(max_y+5))
plt.tight_layout()

plt.savefig("../output_data/Figure9.png")
plt.show()

print(f"Figure 9 does not show a significant relationship between a city's latitude in the Northern Hemishpere and its current percent cloud coverage as of {today}.")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = southern_lat_df['Cloudiness (%)']
max_y=max(y_values)
min_y=min(y_values)
range_y=max_y-min_y

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(-30,60),color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'Figure 10. Cloudiness (%) vs. City Latitude in the Southern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_y-5), round(max_y+5))
plt.tight_layout()

plt.savefig("../output_data/Figure10.png")
plt.show()

print(f"Figure 10 does not show a significant relationship between a city's latitude in the Southern Hemishpere and its current percent cloud coverage as of {today}.")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = northern_lat_df['Wind Speed (mph)']
max_y=max(y_values)
min_y=min(y_values)
range_y=max_y-min_y

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(6,10),color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f'Figure 11. Wind Speed (mph) vs. City Latitude in the Northern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_y-5), round(max_y+5))
plt.tight_layout()

plt.savefig("../output_data/Figure11.png")
plt.show()

print(f"Figure 11 does not show a significant relationship between a city's latitude in the Southern Hemishpere and its current wind speed in miles per hour as of {today}.")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_lat_df['Latitude']
max_lat=max(x_values)
min_lat=min(x_values)
range_lat=max_lat-min_lat
y_values = southern_lat_df['Wind Speed (mph)']
max_y=max(y_values)
min_y=min(y_values)
range_y=max_y-min_y

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"r-squared = {rvalue**2}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"red")
plt.annotate(line_eq,(-15,25),color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f'Figure 12. Wind Speed (mph) vs. City Latitude in the Southern Hemishpere ({today})')
plt.xlim(round(min_lat-5),round(max_lat+5))
plt.ylim(round(min_y-5), round(max_y+5))
plt.tight_layout()

plt.savefig("../output_data/Figure12.png")
plt.show()

print(f"Figure 12 shows a weak, negative, linear relationship between a city's latitude in the Southern Hemishpere and its current wind speed in miles per hour as of {today}.")
print(f'The relationship may be due to outliers with relatively lower latitudes and relatively higher wind speeds. Further analysis should be conducted to account for these outliers')