# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

from api_keys import g_key
from api_keys import weather_api_key

ModuleNotFoundError: No module named 'api_keys'

## Generate Cities List

In [None]:
# import random
# from citipy import citipy
# import random

# lat = [random.uniform(-90,90) for x in range(1250)] #lat range (-90 to 90deg)
# lng = [random.uniform(-180,180) for x in range(1250)] #lng range (-180 to 180deg)

# coords = zip(lat,lng)

# cities =[] #creating empty set

# # cities

# for coord in coords:
#         city = citipy.nearest_city(coord[0],coord[1]).city_name
#         if city not in cities:
#             cities.append(city)

# print(f"The list city has {len(cities)} cities")

# # print(cities) ##testing



### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# #forming dataframe
# df_cities = pd.DataFrame({"City":cities})

# df_cities

In [None]:
#API request
from pprint import pprint

base_url = f"http://api.openweathermap.org/data/2.5/weather"

print("Beginning Data Retrival")
print("--------------------------------")

for index, row in df_cities.iterrows():
    
    parameters = {
    "q":row["City"],
    "appid": weather_api_key,
    "units": "imperial"
    }
    
    try:
        response_data = requests.get(base_url, params=parameters).json()
        
         #Adding latitude to DF
        df_cities.loc[index, "Lat"]= response_data["coord"]["lat"]
        df_cities.loc[index, "Lng"]= response_data["coord"]["lon"]
        df_cities.loc[index, "Max Temp"]= response_data["main"]["temp_max"]
        df_cities.loc[index, "Humidity"]= float(response_data["main"]["humidity"])
        df_cities.loc[index, "Cloudiness"]= response_data["clouds"]["all"]
        df_cities.loc[index, "Wind Speed"]= response_data["wind"]["speed"]
        df_cities.loc[index, "Country"]= response_data["sys"]["country"]
        df_cities.loc[index, "Date"]= response_data["dt"]
                
        record = index + 1
        set_no = int(index/50)+1
        
        print(f"Processing Record {record} of Set {set_no} | {response_data['name']}")
        
    except KeyError:
        print(f"\n City not found. Skipping... \n")
        pass
    
print("--------------------------------")
print("Data Retrieval Complete")
print("--------------------------------")


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#displaying DF
df_cities

In [None]:
df_cities.to_csv("output.csv", index=True, index_label="City No.")

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
df_cities["Humidity"] 

In [None]:
#  Get the indices of cities that have humidity over 100%
df_humidity_100 = df_cities.loc[df_cities["Humidity"] > 100, :]

df_humidity_100

#Therefore no cities where the humidity is greater than 100%

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

# df_cities_cleaned = df_cities.drop(df_humidity_100)

# df_cities_cleaned

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
latitudes = pd.to_numeric(df_cities["Lat"])
temperatures = pd.to_numeric(df_cities["Max Temp"])

#generating scatter plot lat vs temperature
plt.figure(figsize=(8, 6), dpi=80) #re-sizing the plot 
plt.scatter(latitudes, temperatures, marker="o", facecolors="blue", edgecolors="black")

#addings title, x & y labels and grid to the scatter plot
plt.title("City Latitude vs. Max Temperature (23/10/21)")
plt.ylabel("Max Temperature (deg C)")
plt.xlabel("Latitude")
plt.grid()

#saving png image file in the folder
plt.savefig("Fig1.png")
#displaying scatter plot
plt.show()

##analysis
#Temperatures increase moving closer towards the equator. 
#Some cities in the Northern hemisphere experience lower temperatures (in the negatives) due to their 
#location being closer to the north pole

## Latitude vs. Humidity Plot

In [None]:
#generating scatter plot lat vs temperature
humidity = df_cities["Humidity"]

plt.figure(figsize=(8, 6), dpi=80)
plt.scatter(latitudes, humidity, marker="o", facecolors="blue", edgecolors="black")

plt.title("City Latitude vs. Humidity (23/10/21)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid()

plt.savefig("Fig2.png")
plt.show()

#analysis
#More cities have humidity 60% or higher. 
#All cities at the equator (0 deg) latitude have humidity 60% or higher
#More cities have humidity levels lower at the tropics

## Latitude vs. Cloudiness Plot

In [None]:
#Generating scatter plot. Steps - same as above. 
cloudiness = pd.to_numeric(df_cities["Cloudiness"])

plt.figure(figsize=(8, 6), dpi=80)
plt.scatter(latitudes, cloudiness, marker="o", facecolors="blue", edgecolors="black")

plt.title("City Latitude vs. Cloudiness (23/10/21)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid()

plt.savefig("Fig3.png")
plt.show()

##analysis
#at 0 deg lat, more cities  have 100% cloudiness 
#cities located in the middle have lower cloudiness percentages
#again cities far in the northern hemisphere (60-80deg) tend to have higher percentage of cloudiness


## Latitude vs. Wind Speed Plot

In [None]:
from matplotlib.pyplot import figure

#generating scatter plot lat vs temperature
winds = pd.to_numeric(df_cities["Wind Speed"])

plt.figure(figsize=(8, 6), dpi=80)
plt.scatter(latitudes, winds, marker="o", facecolors="blue", edgecolors="black")

plt.title("City Latitude vs. Windspeed (23/10/21)")

plt.ylabel("Windspeed (m/s)")
plt.xlabel("Latitude")
plt.grid()

plt.savefig("Fig4.png")
plt.show()

#analysis
#

## Linear Regression

In [None]:
if latitudes > 0:
    df_cities["Hemisphere"]= "Northern Hemisphere"
elif latitudes < 0:
    df_cities["Hemisphere"]= "Southern Hemisphere"
elif latitudes == 0:
    df_cities["Hemisphere"]= "Equator" 
    
df_cities

In [None]:
import scipy.stats as st

def linear_regression(x_value,y_value,x_label,y_label,title):
    correlation = st.pearsonr(x_value,y_value)
    
    slope, intercept, r_value, p_value, std_error = st.linregress(x_value, y_value)
    print(f"The r-value is {r_value[0]}")

    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    plt.figure(figsize=(8, 6), dpi=80)
    plt.scatter(x_values,y_value, smarker="o", facecolors="blue", edgecolors="black")
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,(20,36),fontsize=12,color="red")

    plt.xlabel(f"{x_label}")
    plt.ylabel(f"{y_label}")
    plt.title(f"{title}")

    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = latitudes
y_values = winds
x_label = "Max Temp"
y_label = "Latitude"
title = "Max Temp vs. Latitude Linear Regression"

linear_regression(x_value,y_value,x_label,y_label,title)

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = mouse_weight
y_values = avg_tumorV

regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=12,color="red")

plt.xlabel("Weight(g)")
plt.ylabel("Avg Tumor Volume for \n Capomulin Regimen (mm3)")
plt.title("Mouse weight versus average tumor volume \n for the Capomulin regimen")

plt.show()