#### **Requirement 1**
Pull in Data From an API

In [1]:
import pandas as pd
import requests

The API I'm getting data from is [Open-Meteo](https://open-meteo.com/en). I collected historical weather data from Louisville, Kentucky from January 1st to July 13th, 2022. The timezone is in Eastern Timezone, the temperature units are in Fahrenheit, and the preciptitation units are in inches.

In [2]:
#Set a variable to the target url
url = 'https://archive-api.open-meteo.com/v1/era5?latitude=38.26&longitude=-85.70&start_date=2022-01-01&end_date=2022-07-13&daily=temperature_2m_max,sunrise,sunset,precipitation_sum&timezone=America%2FNew_York&temperature_unit=fahrenheit&windspeed_unit=mph&precipitation_unit=inch'

In [3]:
#Make a get request from the API
r = requests.get(url)

In [4]:
#Check for success
r

<Response [200]>

In [5]:
#Extract json-structured data from the request
json = r.json()

In [6]:
#Explore the json data to build a dataframe
json.keys()

dict_keys(['latitude', 'longitude', 'generationtime_ms', 'utc_offset_seconds', 'timezone', 'timezone_abbreviation', 'elevation', 'daily_units', 'daily'])

In [7]:
#Finding the right type of data for a dataframe
type(json['daily'])

dict

In [8]:
#Creating a dataframe
df = pd.DataFrame(json['daily'])

In [9]:
df

Unnamed: 0,time,temperature_2m_max,sunrise,sunset,precipitation_sum
0,2022-01-01,65.2,2022-01-01T08:58,2022-01-01T18:34,2.17
1,2022-01-02,53.5,2022-01-02T08:58,2022-01-02T18:35,0.06
2,2022-01-03,33.8,2022-01-03T08:58,2022-01-03T18:36,0.00
3,2022-01-04,43.8,2022-01-04T08:58,2022-01-04T18:37,0.00
4,2022-01-05,46.4,2022-01-05T08:58,2022-01-05T18:38,0.00
...,...,...,...,...,...
189,2022-07-09,77.6,2022-07-09T06:26,2022-07-09T21:09,0.67
190,2022-07-10,88.7,2022-07-10T06:26,2022-07-10T21:09,0.00
191,2022-07-11,90.6,2022-07-11T06:27,2022-07-11T21:09,0.00
192,2022-07-12,90.0,2022-07-12T06:28,2022-07-12T21:08,0.00


#### **Data Cleaning**
Formatting Datetime Columns Before Analyzing the Data

In [10]:
df['time'] = pd.to_datetime(df['time'])

In [11]:
df['sunrise'] = pd.to_datetime(df['sunrise'])

In [12]:
df['sunset'] = pd.to_datetime(df['sunset'])

In [13]:
df.head()

Unnamed: 0,time,temperature_2m_max,sunrise,sunset,precipitation_sum
0,2022-01-01,65.2,2022-01-01 08:58:00,2022-01-01 18:34:00,2.17
1,2022-01-02,53.5,2022-01-02 08:58:00,2022-01-02 18:35:00,0.06
2,2022-01-03,33.8,2022-01-03 08:58:00,2022-01-03 18:36:00,0.0
3,2022-01-04,43.8,2022-01-04 08:58:00,2022-01-04 18:37:00,0.0
4,2022-01-05,46.4,2022-01-05 08:58:00,2022-01-05 18:38:00,0.0


#### **Requirement 2**
Find and Print Two Descriptive Statistics About the Data

In [14]:
#The name of each day of the week for each date
df['time'].dt.day_name()

0       Saturday
1         Sunday
2         Monday
3        Tuesday
4      Wednesday
         ...    
189     Saturday
190       Sunday
191       Monday
192      Tuesday
193    Wednesday
Name: time, Length: 194, dtype: object

In [15]:
#The minimum temperature between January 1st and July 13th
df.loc[:, 'temperature_2m_max'].min()

20.3

#### **Requirement 3**
Write a Query in Pandas

In [16]:
#How many days had a precipitation total of more than half an inch?
query_df = df.query('precipitation_sum > 0.5')
query_df

Unnamed: 0,time,temperature_2m_max,sunrise,sunset,precipitation_sum
0,2022-01-01,65.2,2022-01-01 08:58:00,2022-01-01 18:34:00,2.17
8,2022-01-09,44.4,2022-01-09 08:58:00,2022-01-09 18:42:00,0.8
32,2022-02-02,51.7,2022-02-02 08:45:00,2022-02-02 19:07:00,0.9
33,2022-02-03,38.1,2022-02-03 08:44:00,2022-02-03 19:09:00,1.61
47,2022-02-17,63.6,2022-02-17 08:28:00,2022-02-17 19:24:00,1.87
52,2022-02-22,62.7,2022-02-22 08:22:00,2022-02-22 19:30:00,0.86
54,2022-02-24,38.6,2022-02-24 08:19:00,2022-02-24 19:32:00,0.53
64,2022-03-06,70.9,2022-03-06 08:05:00,2022-03-06 19:42:00,0.77
65,2022-03-07,65.8,2022-03-07 08:04:00,2022-03-07 19:43:00,0.71
80,2022-03-22,69.7,2022-03-22 07:41:00,2022-03-22 19:57:00,0.55


In [17]:
#Answer: 21 days
query_df.shape

(21, 5)

#### **Requirement 4**
Select and Print the Second and Third Columns

In [18]:
df_columns = df[['temperature_2m_max', 'sunrise']]
df_columns

Unnamed: 0,temperature_2m_max,sunrise
0,65.2,2022-01-01 08:58:00
1,53.5,2022-01-02 08:58:00
2,33.8,2022-01-03 08:58:00
3,43.8,2022-01-04 08:58:00
4,46.4,2022-01-05 08:58:00
...,...,...
189,77.6,2022-07-09 06:26:00
190,88.7,2022-07-10 06:26:00
191,90.6,2022-07-11 06:27:00
192,90.0,2022-07-12 06:28:00


#### **Requirement 5**
Select and Print the First Four Rows

In [19]:
df.iloc[:4]

Unnamed: 0,time,temperature_2m_max,sunrise,sunset,precipitation_sum
0,2022-01-01,65.2,2022-01-01 08:58:00,2022-01-01 18:34:00,2.17
1,2022-01-02,53.5,2022-01-02 08:58:00,2022-01-02 18:35:00,0.06
2,2022-01-03,33.8,2022-01-03 08:58:00,2022-01-03 18:36:00,0.0
3,2022-01-04,43.8,2022-01-04 08:58:00,2022-01-04 18:37:00,0.0
