In [1]:
#fix plots
#save .png
#fix lines of regression

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [3]:
from citipy import city

ImportError: cannot import name 'city' from 'citipy' (/Users/josephneff/opt/anaconda3/lib/python3.9/site-packages/citipy/__init__.py)

## Generate Cities List

In [4]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

604

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).

> **HINT:** The OpenWeatherMap API only allows 60 calls per minute under their free plan. Try using `time.sleep(60)` after each set of 50 cities to avoid API errors due to large calls.

In [None]:
city_test = cities
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

cities1 = []
temp = []
humidity = []
clouds = []
windspeed = []
lats = []
long = []

for city in city_test:
    query_url = f"{url}appid={weather_api_key}&units={units}&q={city}"
    try:
        url_city=(query_url).replace(" ","+")
        response=requests.get(url_city).json()
        cities1.append(response["name"])
        temp.append(response['main']["temp"])
        humidity.append(response['main']["humidity"])
        clouds.append(response["clouds"]["all"])
        windspeed.append(response['wind']["speed"])
        lats.append(response["coord"]["lat"])
        long.append(response["coord"]["long"])
        print(f'Processing number {(city_test.index(city)+1)}, which is {city}')
    except:
        print(f'Processing number {(city_test.index(city)+1)}, which is {city} and has no information')

Processing number 1, which is illoqqortoormiut and has no information
Processing number 2, which is atuona and has no information
Processing number 3, which is korla and has no information
Processing number 4, which is el dorado and has no information
Processing number 5, which is busselton and has no information
Processing number 6, which is vaini and has no information
Processing number 7, which is lambarene and has no information
Processing number 8, which is petropavlovsk-kamchatskiy and has no information
Processing number 9, which is victor harbor and has no information
Processing number 10, which is narsaq and has no information
Processing number 11, which is jacareacanga and has no information
Processing number 12, which is yellowknife and has no information
Processing number 13, which is victoria and has no information
Processing number 14, which is mataura and has no information
Processing number 15, which is jamestown and has no information
Processing number 16, which is pal

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
cities_df = pd.DataFrame({"City": cities1,
                        "Temperature": temp,
                        "Humidity": humidity,
                        "Clouds" : clouds,
                        "Windspeed": windspeed,
                        "Latitude": lats,
                        "Longitude": long
                         })
cities_df.to_csv("CitiesDF.csv", index=False)
cities_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
over_hundred = cities_df[cities_df.Humidity > 100]
over_hundred
#no humidity over 100

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
humidity_df = cities_df['Humidity']

qs = humidity_df.quantile([.25,.5,.75])
lower = qs[.25]
upper = qs[.75]
interquartile = upper-lower
lowerbnd = lower - (1.5*interquartile)
upperbnd = upper + (1.5*interquartile)

clean_city_data = cities_df.drop(cities_df.index[(cities_df['Humidity'] < lowerbnd) | (cities_df['Humidity'] > upperbnd)])
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data.count()
cities_df.count()
#12 outliers dropped

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
x_values = clean_city_data.Latitude

y_values = clean_city_data.Temperature

plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title('City Latitude vs. Temperature (02/01/2022)')
plt.grid()
plt.show()
#plt.savefig("../output_data/lattemp.png")

## Latitude vs. Humidity Plot

In [None]:
x_values = clean_city_data.Latitude

y_values = clean_city_data.Humidity 

plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('City Latitude vs. Humidity (02/01/2022)')
plt.grid()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
x_values = clean_city_data.Latitude

y_values = clean_city_data.Clouds

plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('City Latitude vs. Cloudiness (02/01/2022)')
plt.grid()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
x_values = clean_city_data.Latitude

y_values = clean_city_data.Windspeed

plt.scatter(x_values, y_values)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('City Latitude vs. Wind Speed (02/01/2022)')
plt.grid()
plt.show()

## Linear Regression

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
nhem = clean_city_data.drop(clean_city_data.index[clean_city_data['Latitude']<0])

x_values = nhem.Temperature
y_values = nhem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Maximum Temperature')
plt.ylabel('Latitude')
plt.title("Northern Hemisphere Maximum Temperature vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
shem = clean_city_data.drop(clean_city_data.index[clean_city_data.Latitude>0])

x_values = shem.Temperature
y_values = shem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Maximum Temperature')
plt.ylabel('Latitude')
plt.title("Southern Hemisphere Maximum Temperature vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = nhem.Humidity 
y_values = nhem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Humidity')
plt.ylabel('Latitude')
plt.title("Northern Hemisphere Humdity vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = shem.Humidity 
y_values = shem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Humidity')
plt.ylabel('Latitude')
plt.title("Southern Hemisphere Humdity vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = nhem.Clouds 
y_values = nhem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Cloudiness')
plt.ylabel('Latitude')
plt.title("Northern Hemisphere Cloudiness vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = shem.Clouds 
y_values = shem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Cloudiness')
plt.ylabel('Latitude')
plt.title("Southern Hemisphere Cloudiness vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = nhem.Windspeed 
y_values = nhem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Wind Speed')
plt.ylabel('Latitude')
plt.title("Northern Hemisphere Wind Speed vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = shem.Windspeed 
y_values = shem.Latitude

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.xlabel('Wind Speed')
plt.ylabel('Latitude')
plt.title("Southern Hemisphere Wind Speed vs. Latitude Linear Regression (02/01/2022)")
print(rvalue)
plt.grid()
plt.show()