# Mars Weather
Scrape and Analyze Mars Weather Data

In [None]:
# load dependencies
from splinter import Browser
from bs4 import BeautifulSoup as soup
import matplotlib.pyplot as plt
import pandas as pd


# 1. Visit Site
Use automated browsing to visit the Mars Temperature Data Site. Inspect the page to identify which elements to scrape.



In [None]:
# initiate browser and store url
browser = Browser('chrome')
url = 'https://static.bc-edx.com/data/web/mars_facts/temperature.html'

# visit site
browser.visit(url)


# 2. Scrape Table
Create a BeautifulSoup object and use it to scrape the data in the HTML table.

Note that this can also be achieved by using the Pandas read_html function. However, we'll use BeautifulSoup here to continue sharpening our web scraping skills.



In [None]:
# store html content
html = browser.html

# create BeautifulSoup object
html_soup = soup(html, 'html.parser')

# extract all rows of data
table = html_soup.find('table', class_='table')


In [None]:
# close browser connection
browser.quit()


# 3. Store Results
Assemble the scraped data into a Pandas DataFrame. The columns should have the same headings as the table on the website:

id: the identification number of a single transmission from the Curiosity rover
terrestrial_date: the date on Earth
sol: the number of elapsed sols (Martian days) since Curiosity landed on Mars
ls: the solar longitude
month: the Martian month
min_temp: the minimum temperature, in Celsius, of a single Martian day (sol)
pressure: The atmospheric pressure at Curiosity's location


In [None]:
# Create empty header and data_rows lists
header = []
data_rows = []

# Extract the header
table_header = table.find('tr')
if table_header:
    header = [entry.text for entry in table_header.find_all('th')]

# Extract the data rows
table_rows = table.find_all('tr')[1:]  # Skip the first row (header row)
data_rows = [[entry.text for entry in row.find_all('td')] for row in table_rows]


In [None]:
# create a `pandas` DataFrame by using the list of rows and a list of the column names
mars_weather_df = pd.DataFrame(data_rows, columns=header)


In [None]:
# confirm DataFrame was created successfully
mars_weather_df.head()


# 4. Prepare Data for Analysis
Examine the data types that are currently associated with each column. If necessary, cast (or convert) the data to the appropriate datetime, int, or float data types.



In [None]:
# examine data type of each column
mars_weather_df.dtypes


In [None]:
# change data types for data analysis
mars_weather_df = mars_weather_df.astype({'id':'int',
                                          'sol':'int',
                                          'ls':'int',
                                          'month':'int',
                                          'min_temp':'float',
                                          'pressure':'float'
                                         })

mars_weather_df['terrestrial_date'] = pd.to_datetime(mars_weather_df['terrestrial_date'])


In [None]:
# confirm type changes were successful by examining data types again
mars_weather_df.dtypes


# 5. Analyze the Data
Analyze the dataset with pandas:

How many months exist on Mars?


In [None]:
# the max month integer should be the number of months on Mars
mars_weather_df['month'].max()


In [None]:
# count the length of an array of unique `sol` values
len(mars_weather_df['sol'].unique())


In [None]:
# group by month and calculate average `min_temp` within each group
avg_min_temp_by_month = mars_weather_df.groupby(['month'])['min_temp'].mean()
avg_min_temp_by_month


In [None]:
# plot the average temperature by month
avg_min_temp_by_month.plot.bar()
plt.ylabel('Temperature in Celsius')
plt.show()


In [None]:
# identify the coldest and hottest months in Curiosity's location
avg_min_temp_by_month.sort_values().plot.bar()
plt.ylabel('Temperature in Celsius')
plt.show()


# Which months have the lowest and the highest atmospheric pressure on Mars? To answer this question:
Find the average the daily atmospheric pressure of all the months.
Plot the results as a bar chart.


In [None]:
# group by month and calculate average pressure
avg_pressure_by_month = mars_weather_df.groupby(['month'])['pressure'].mean()
avg_pressure_by_month


In [None]:
# plot the average pressure by month
avg_pressure_by_month.sort_values().plot.bar()
plt.ylabel('Atmospheric Pressure')
plt.show()


# About how many terrestrial (Earth) days exist in a Martian year? To answer this question:
Consider how many days elapse on Earth in the time that Mars circles the Sun once.
Visually estimate the result by plotting the daily minimum temperature.


In [None]:
# how many terrestrial days are there in a Martian year?
mars_weather_df.plot(y='min_temp', use_index=True, legend=False)
plt.xlabel('Number of terrestrial days')
plt.ylabel('Minimum temperature')
plt.show()


# 6. Save the Data
Export the DataFrame to a CSV file.



In [None]:
# write data to csv
mars_weather_df.to_csv('output/mars_weather.csv')