In [1]:
%matplotlib notebook

In [2]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager

# Open ChromeDriver
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Visit Mars site
url="https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html"
browser.visit(url)
html=browser.html
soup=BeautifulSoup(html,'html.parser')

In [4]:
# Find the table
table=soup.find('table',class_='table')

In [5]:
# Find all the data rows and place in a list 
data_rows=[]
rows=table.find_all('tr', class_='data-row')
for row in rows:
    data=row.find_all('td')  
    data_list=[]
    for i in data:
        data_list.append(i.text)
    data_rows.append(data_list)
data_rows

[['2', '2012-08-16', '10', '155', '6', '-75.0', '739.0'],
 ['13', '2012-08-17', '11', '156', '6', '-76.0', '740.0'],
 ['24', '2012-08-18', '12', '156', '6', '-76.0', '741.0'],
 ['35', '2012-08-19', '13', '157', '6', '-74.0', '732.0'],
 ['46', '2012-08-20', '14', '157', '6', '-74.0', '740.0'],
 ['57', '2012-08-21', '15', '158', '6', '-78.0', '740.0'],
 ['68', '2012-08-22', '16', '158', '6', '-77.0', '740.0'],
 ['79', '2012-08-23', '17', '159', '6', '-76.0', '742.0'],
 ['112', '2012-08-27', '21', '161', '6', '-74.0', '741.0'],
 ['114', '2012-08-28', '22', '162', '6', '-74.0', '742.0'],
 ['123', '2012-08-29', '23', '162', '6', '-75.0', '741.0'],
 ['134', '2012-08-30', '24', '163', '6', '-75.0', '742.0'],
 ['145', '2012-08-31', '25', '163', '6', '-75.0', '743.0'],
 ['156', '2012-09-01', '26', '164', '6', '-76.0', '745.0'],
 ['163', '2012-09-02', '27', '164', '6', '-75.0', '743.0'],
 ['170', '2012-09-03', '28', '165', '6', '-75.0', '745.0'],
 ['171', '2012-09-04', '29', '166', '6', '-75.0',

In [6]:
# Find the header row
mars_header=table.find_all('th')
columns_list=[]
for head in mars_header:
    columns=head.text
    columns_list.append(columns)
columns_list

['id', 'terrestrial_date', 'sol', 'ls', 'month', 'min_temp', 'pressure']

In [7]:
# Create the dataframe
mars_df=pd.DataFrame(data_rows,columns=columns_list)
mars_df

Unnamed: 0,id,terrestrial_date,sol,ls,month,min_temp,pressure
0,2,2012-08-16,10,155,6,-75.0,739.0
1,13,2012-08-17,11,156,6,-76.0,740.0
2,24,2012-08-18,12,156,6,-76.0,741.0
3,35,2012-08-19,13,157,6,-74.0,732.0
4,46,2012-08-20,14,157,6,-74.0,740.0
...,...,...,...,...,...,...,...
1862,1889,2018-02-23,1973,133,5,-78.0,730.0
1863,1892,2018-02-24,1974,134,5,-77.0,729.0
1864,1894,2018-02-25,1975,134,5,-76.0,729.0
1865,1893,2018-02-26,1976,135,5,-77.0,728.0


In [8]:
# Determine the data types of each column and convert where needed
mars_df.dtypes

id                  object
terrestrial_date    object
sol                 object
ls                  object
month               object
min_temp            object
pressure            object
dtype: object

In [9]:
mars_df=mars_df.astype({'id':'int'})
mars_df.dtypes

id                   int32
terrestrial_date    object
sol                 object
ls                  object
month               object
min_temp            object
pressure            object
dtype: object

In [10]:
mars_df=mars_df.astype({'sol':'int'})
mars_df.dtypes

id                   int32
terrestrial_date    object
sol                  int32
ls                  object
month               object
min_temp            object
pressure            object
dtype: object

In [11]:
mars_df['terrestrial_date']=pd.to_datetime(mars_df['terrestrial_date'])
mars_df.dtypes

id                           int32
terrestrial_date    datetime64[ns]
sol                          int32
ls                          object
month                       object
min_temp                    object
pressure                    object
dtype: object

In [12]:
mars_df=mars_df.astype({'ls':'int'})
mars_df.dtypes

id                           int32
terrestrial_date    datetime64[ns]
sol                          int32
ls                           int32
month                       object
min_temp                    object
pressure                    object
dtype: object

In [13]:
mars_df=mars_df.astype({'month':'int'})
mars_df.dtypes

id                           int32
terrestrial_date    datetime64[ns]
sol                          int32
ls                           int32
month                        int32
min_temp                    object
pressure                    object
dtype: object

In [14]:
mars_df=mars_df.astype({'min_temp':'float'})
mars_df.dtypes

id                           int32
terrestrial_date    datetime64[ns]
sol                          int32
ls                           int32
month                        int32
min_temp                   float64
pressure                    object
dtype: object

In [15]:
mars_df=mars_df.astype({'pressure':'float'})
mars_df.dtypes

id                           int32
terrestrial_date    datetime64[ns]
sol                          int32
ls                           int32
month                        int32
min_temp                   float64
pressure                   float64
dtype: object

In [16]:
mars_df

Unnamed: 0,id,terrestrial_date,sol,ls,month,min_temp,pressure
0,2,2012-08-16,10,155,6,-75.0,739.0
1,13,2012-08-17,11,156,6,-76.0,740.0
2,24,2012-08-18,12,156,6,-76.0,741.0
3,35,2012-08-19,13,157,6,-74.0,732.0
4,46,2012-08-20,14,157,6,-74.0,740.0
...,...,...,...,...,...,...,...
1862,1889,2018-02-23,1973,133,5,-78.0,730.0
1863,1892,2018-02-24,1974,134,5,-77.0,729.0
1864,1894,2018-02-25,1975,134,5,-76.0,729.0
1865,1893,2018-02-26,1976,135,5,-77.0,728.0


In [17]:
# Save dataframe to csv file
mars_df.to_csv('Analysis/missiontomars.csv', index=False)

In [18]:
mars_df.describe()

Unnamed: 0,id,sol,ls,month,min_temp,pressure
count,1867.0,1867.0,1867.0,1867.0,1867.0,1867.0
mean,955.551152,1015.670059,168.865024,6.141939,-76.12105,841.066417
std,545.048507,565.342298,105.599153,3.506493,5.504098,54.253226
min,2.0,10.0,0.0,1.0,-90.0,727.0
25%,489.5,546.5,78.0,3.0,-80.0,800.0
50%,959.0,1028.0,160.0,6.0,-76.0,853.0
75%,1425.5,1505.5,257.5,9.0,-72.0,883.0
max,1895.0,1977.0,359.0,12.0,-62.0,925.0


In [19]:
# Group by month to determine averages
avg_temp_df=mars_df.groupby(['month']).mean()
avg_temp_df

Unnamed: 0_level_0,id,sol,ls,min_temp,pressure
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1013.873563,1077.574713,15.281609,-77.16092,862.488506
2,1109.882022,1175.853933,44.258427,-79.932584,889.455056
3,1132.697917,1204.40625,75.010417,-83.307292,877.322917
4,1175.164948,1244.5,104.365979,-82.747423,806.329897
5,1115.033557,1182.691275,132.885906,-79.308725,748.557047
6,735.306122,750.829932,164.897959,-75.29932,745.054422
7,714.549296,715.105634,194.894366,-72.28169,795.105634
8,718.078014,795.333333,224.347518,-68.382979,873.829787
9,785.843284,861.186567,254.052239,-69.171642,913.30597
10,955.723214,1034.669643,286.330357,-71.982143,887.3125


In [27]:
# Plot Temp by month
avg_temp_df.plot.bar(y='min_temp', rot=0, title='Temp(C) by Martian months')
plt.savefig('Analysis/Temp-by-Martian-month.jpg')
plt.tight_layout()

<IPython.core.display.Javascript object>

In [43]:
# Plot pressure by month
avg_temp_df.plot.bar(y='pressure', rot=0, title='Atmospheric pressure (atm) by month on Mars')
plt.ylim([0,1100])
plt.savefig('Analysis/P-by-Martian-month.jpg')
plt.tight_layout()

<IPython.core.display.Javascript object>

In [22]:
# Create a new dataframe to plot terrestrial_date vs min_temp
mars_year_df=mars_df[['terrestrial_date','min_temp']]
mars_year_df

Unnamed: 0,terrestrial_date,min_temp
0,2012-08-16,-75.0
1,2012-08-17,-76.0
2,2012-08-18,-76.0
3,2012-08-19,-74.0
4,2012-08-20,-74.0
...,...,...
1862,2018-02-23,-78.0
1863,2018-02-24,-77.0
1864,2018-02-25,-76.0
1865,2018-02-26,-77.0


In [23]:
# Make terrestrial_dates column the index 
mars_year_df=mars_year_df.set_index(['terrestrial_date'])
mars_year_df

Unnamed: 0_level_0,min_temp
terrestrial_date,Unnamed: 1_level_1
2012-08-16,-75.0
2012-08-17,-76.0
2012-08-18,-76.0
2012-08-19,-74.0
2012-08-20,-74.0
...,...
2018-02-23,-78.0
2018-02-24,-77.0
2018-02-25,-76.0
2018-02-26,-77.0


In [42]:
# Slice data to see only rows for year 2013 to 2017, place in a new dataframe
mars_years=mars_year_df.loc['2013-01-01':'2017-12-31']
# Plot new dataframe in a line chart
mars_years.plot(title='Daily Martian Temp (C)')
plt.tight_layout()
plt.savefig('Analysis/mars-temp-vs-earth-dates.jpg')

<IPython.core.display.Javascript object>