# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
from scipy import stats
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression

#regressions
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

620

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).

> **HINT:** The OpenWeatherMap API only allows 60 calls per minute under their free plan. Try using `time.sleep(60)` after each set of 50 cities to avoid API errors due to large calls.

In [3]:
cities[0:5]

['souillac', 'khatanga', 'scarborough', 'te anau', 'berbera']

In [4]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

In [5]:
# Build query URL and request your results in Celsius
city = "khatanga"
query_url = f"{url}appid={weather_api_key}&q={city}&units={units}"

# Get weather data
weather_response = requests.get(query_url)
print(weather_response.status_code)
weather_json = weather_response.json()

200


In [6]:
type(weather_json)

dict

In [7]:
weather_json.keys()

dict_keys(['coord', 'weather', 'base', 'main', 'visibility', 'wind', 'clouds', 'dt', 'sys', 'timezone', 'id', 'name', 'cod'])

In [8]:
pprint(weather_json)

{'base': 'stations',
 'clouds': {'all': 100},
 'cod': 200,
 'coord': {'lat': 71.9667, 'lon': 102.5},
 'dt': 1644209916,
 'id': 2022572,
 'main': {'feels_like': -30.93,
          'grnd_level': 1029,
          'humidity': 98,
          'pressure': 1033,
          'sea_level': 1033,
          'temp': -18.33,
          'temp_max': -18.33,
          'temp_min': -18.33},
 'name': 'Khatanga',
 'sys': {'country': 'RU', 'sunrise': 1644202555, 'sunset': 1644220342},
 'timezone': 25200,
 'visibility': 2305,
 'weather': [{'description': 'overcast clouds',
              'icon': '04d',
              'id': 804,
              'main': 'Clouds'}],
 'wind': {'deg': 233, 'gust': 17.11, 'speed': 9.69}}


In [9]:
lat = weather_json["coord"]["lat"]
lng = weather_json["coord"]["lon"]
country = weather_json["sys"]["country"]
clouds = weather_json["clouds"]["all"]

date_dt = weather_json["dt"]
city_name = weather_json["name"]
wind = weather_json["wind"]["speed"]

temp = weather_json["main"]["temp"]
pressure = weather_json["main"]["pressure"]
humidity = weather_json["main"]["humidity"]

In [10]:
humidity

98

In [11]:
cities[-10:]

['tala',
 'goderich',
 'sao domingos',
 'esna',
 'samfya',
 'ust-uda',
 'shahr-e babak',
 'campbeltown',
 'bardiyah',
 'buala']

In [12]:
650/60

10.833333333333334

In [None]:
lats = []
lngs = []
countries = []
clouds = []
dates = []
names = []
winds = []
temps = []
pressures = []
humids = []

for city in cities[0:500]:
    #make the request
    print(city)
    # Build query URL and request your results in Celsius
    query_url = f"{url}appid={weather_api_key}&q={city}&units={units}"

    # Get weather data
    weather_response = requests.get(query_url)
    print(weather_response.status_code)
    print()
    
    if(weather_response.status_code == 200):
    
        # get data
        weather_json = weather_response.json()

        # parse the data
        lat = weather_json["coord"]["lat"]
        lng = weather_json["coord"]["lon"]
        country = weather_json["sys"]["country"]
        cloudiness = weather_json["clouds"]["all"]

        date_dt = weather_json["dt"]
        city_name = weather_json["name"]
        wind = weather_json["wind"]["speed"]

        temp = weather_json["main"]["temp"]
        pressure = weather_json["main"]["pressure"]
        humidity = weather_json["main"]["humidity"]
        #slap into lists
        lats.append(lat)
        lngs.append(lng)
        countries.append(country)
        clouds.append(cloudiness)
        dates.append(date_dt)
        names.append(city_name)
        winds.append(wind)
        temps.append(temp)
        pressures.append(pressure)
        humids.append(humidity)

souillac
200

khatanga
200

scarborough
200

te anau
200

berbera
200

ancud
200

piranhas
200

tasiilaq
200

columbus
200

roma
200

fernie
200

progreso
200

port alfred
200

vostok
200

sohbatpur
200

hermanus
200

kalabo
200

jamestown
200

atuona
200

beringovskiy
200

caucaia
200

busselton
200

namibe
200

lompoc
200

ushuaia
200

adamas
200

hofn
200

muros
200

vaini
200

mataura
200

byron bay
200

tuktoyaktuk
200

punta arenas
200

dikson
200

rikitea
200

bisho
200

nikolskoye
200

barentsburg
404

nuuk
200

amderma
404

laguna
200

albany
200

turtas
200

sao joao da barra
200

aksha
200

thompson
200

mbour
200

savannah bight
200

cape town
200

waddan
200

kapaa
200

yumen
200

salalah
200

nouadhibou
200

sao luis de montes belos
200

taoudenni
200

faanui
200

bluff
200

ribeira grande
200

belushya guba
404

chipinge
200

rio grande
200

lorengau
200

raichur
200

ilulissat
200

barrow
200

quelimane
200

sulangan
200

qaanaaq
200

carnarvon
200

keetmanshoop
200

sa

In [None]:
 df = pd.DataFrame()

df["City"] = names
df["Country"] = countries
df["Latitude"] = lats
df["Longitude"] = lngs
df["Date"] = dates
df["Wind_Speed"] = winds
df["Pressure"] = pressures
df["Humidity"] = humids
df["Temperature"] = temps
df["Cloudiness"] = clouds

df.head(190)

In [None]:
weather_df.info()

In [None]:
weather_df.to_csv("my_data.csv", index=False)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
weather_df.info()

In [None]:
weather_df.describe()

In [None]:
mask = weather_df.humidity <= 100
weather_df_sub = weather_df.loc[mask].dropna().reset_index(drop=True)

In [None]:
weather_df.info()

In [None]:
x_values = weather_df["latitude"]
y_values = weather_df["temperature"]

plt.scatter(x_values,y_values)
plt.title('Temperature vs Latitude')
plt.xlabel('Latitude')
plt.ylabel('Temperature (F)')
plt.savefig("temp_lat_asante.png")
plt.tight_layout()
plt.show()

## Latitude vs. Humidity Plot

In [None]:
x_values = weather_df["latitude"]
y_values = weather_df["humidity"]

plt.scatter(x_values,y_values)
plt.title('Humidity vs Latitude')
plt.xlabel('Latitude')
plt.ylabel('humidity (%)')
plt.savefig("humid_lat_asante.png")
plt.tight_layout()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
x_values = weather_df["latitude"]
y_values = weather_df["clouds"]

plt.scatter(x_values,y_values)
plt.title('Latitude vs. Cloudiness')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.savefig("cloud_lat_asante.png")
plt.tight_layout()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
x_values = weather_df["latitude"]
y_values = weather_df["wind_speed"]

plt.scatter(x_values,y_values)
plt.title('Latitude vs. Wind Speed')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig("wind_lat_asante.png")
plt.tight_layout()
plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
# def linear_regression(x_values, y_values):
#     (slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
#     regress_values = x_values * slope + intercept
#     line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
#     correlation = st.pearsonr(x_values, y_values)
    
#     print(f"r-squared: {rvalue**2}")
#     print(f"Correlation: {correlation}")
#     print(f"Line: {line_eq}")

#     return regress_values, line_eq

In [None]:
# Create Northern and Southern Hemisphere DataFrames
#mask = weather_df['latitude'] > 0
#northern_hemisphere = weather_df[mask]
#southern_hemisphere = weather_df[~mask]

In [None]:
df_north = weather_df_sub.loc[weather_df_sub.latitude >= 0]
df_south = weather_df_sub.loc[weather_df_sub.latitude < 0] 

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = df_north['latitude']
y_values = df_north['temperature']


(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))


plt.scatter(x_values,y_values)
plt.title('Max Temp vs. Latitude Linear Regression (Northern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(10,20),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Temperature (F)')
print(f"The r-squared is: {rvalue**2}")
plt.show()

In [None]:
feature = weather_df_north[["latitude"]]
target = weather_df_north["temperature"]

# STATSMODELS
X = sm.add_constant(feature)
Y = target

model = sm.OLS(Y,X)
results = model.fit()
results.summary()

In [None]:
reg = LinearRegression()
reg.fit(np.array(feature), np.array(target))
print(reg.coef_)
print(reg.intercept_)

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 

In [None]:
actual = weather_df_north["temperature"]
predictions = reg.predict(weather_df_north["latitude"].values.reshape(-1, 1))

r2 = r2_score(actual, predictions)
mse = mean_squared_error(actual, predictions)
mae = mean_absolute_error(actual, predictions)

print(f"R2: {r2}")
print(f"MSE: {mse}")
print(f"RMSE: {np.sqrt(mse)}")
print(f"MAE: {mae}")

In [None]:
plt.figure(figsize=(10,6))

plt.scatter(predictions, actual)
plt.plot(actual, actual)

plt.xlabel("Predicted")
plt.ylabel("Actual")

plt.show()

In [None]:
residuals = predictions - actual

plt.figure(figsize=(10,6))

plt.scatter(predictions, residuals)
plt.hlines(0, -10, 85)

plt.xlabel("Predicted")
plt.ylabel("Residuals")

plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = df_south["latitude"]
y_values = df_south["temperature"]

(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.title('Max Temp vs. Latitude Linear Regression (Southern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-35,80),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Temperature (F)')
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = df_north["latitude"]
y_values = df_north["humidity"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))


plt.scatter(x_values,y_values)
plt.title('Humidity at Latitude Positions (Northern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(50,20),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
print(f"The r-squared is: {rvalue**2}")
plt.tight_layout()
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = df_south["latitude"]
y_values = df_south["humidity"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.title('Humidity at Latitude Positions (Southern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-35,80),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
print(f"The r-squared is: {rvalue**2}")
plt.tight_layout()
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = df_north["latitude"]
y_values = df_north["clouds"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.title('Cloudiness at Latitude Positions (Northern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(50,20),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
print(f"The r-squared is: {rvalue**2}")
plt.tight_layout()
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = df_south["latitude"]
y_values = df_south["clouds"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))


plt.scatter(x_values,y_values)
plt.title('Cloudiness at Latitude Positions (Southern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-30,60),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
print(f"The r-squared is: {rvalue**2}")
plt.tight_layout()
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = df_north["latitude"]
y_values = df_north["wind_speed"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.title('Wind Speed at Latitude Positions (Northern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(50,20),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
print(f"The r-squared is: {rvalue**2}")
plt.tight_layout()
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = df_south["latitude"]
y_values = df_south["wind_speed"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))


plt.scatter(x_values,y_values)
plt.title('Wind Speed at Latitude Positions (Southern Hemisphere)')
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-30,20),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
print(f"The r-squared is: {rvalue**2}")
plt.tight_layout()
plt.show()