In [None]:
# Import dependencies 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

# Set up Browser with Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# Have the browser navigate to the website and copy the content
url = 'https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html'
browser.visit(url)
html = browser.html
soup = soup(html, 'html.parser')

## Find the table

In [None]:
 # Find the table
table = soup.find('table', class_='table')
table

In [None]:
# Find the header
table_header = table.find_all('th')

headers=[]
for header_ele in table_header:
    title=header_ele.text
    headers.append(title)
    
headers

In [None]:
df=pd.DataFrame(columns=headers)
df

In [None]:
# Find the row data 
for row in table.find_all('tr',class_='data-row'):
        data = row.find_all('td')
        row_data=[td.text.strip() for td in data]
        lenghth=len(df)
        df.loc[lenghth]=row_data
        
df

In [None]:
# Close the session 
browser.quit()

## Checking the data types

In [None]:
# Check data types
df.dtypes

In [None]:
# Change data types 
df['id']=df['id'].astype(int)
df['terrestrial_date']=pd.to_datetime(df['terrestrial_date'])
df['sol']=df['sol'].astype(int)
df['ls']=df['ls'].astype(int)
df['month']=df['month'].astype(int)
df['min_temp']=df['min_temp'].astype(float)
df['pressure']=df['pressure'].astype(float)

# Check data types again 
df.info()

## Finding the number of Months on Mars

In [None]:
# Answer the following question: How many months exist on Mars?
nb_of_months=df['month'].nunique()
print(f'There are', nb_of_months ,'months on Mars.')

# Finding the number of Martian days worth of data

In [None]:
# How many Martian (and not Earth) days worth of data exist in the scraped dataset?
first_day_of_data=df['sol'].min()
last_day_of_data = df['sol'].max()
unique_days_date=df['sol'].nunique()
print(f'There are', unique_days_date, 'Martian days worth of data in the'
      'dataset, spreading from day',first_day_of_data, 'to day',last_day_of_data,'. It is worth noting that in some'
     'days data is taken multiple times. ')



## Finding the coldest and the warmest months on Mars

In [None]:
# Iterating through the data to find the average minimum temperature for each month
avg_min_temp=[]

for i in range(1,nb_of_months+1):
    weather_per_month={"month_nb":"","avg_min_temp":""}
    avg_min_temp_month=df.loc[df['month']==i]['min_temp'].mean()
    round_avg_min_temp_month = round(avg_min_temp_month,0)
    weather_per_month["month_nb"] = i
    weather_per_month["avg_min_temp"] = round_avg_min_temp_month
    avg_min_temp.append(weather_per_month)    

print(avg_min_temp)      

In [None]:
# Export Python list to JSON file
import json
json_mars_temp = json.dumps(avg_min_temp)
# Save json as df file
df_month_temp=pd.read_json(json_mars_temp)
df_month_temp

In [None]:
# Plot avg_min_temp
plt.bar(df_month_temp['month_nb'],df_month_temp['avg_min_temp'], color='green')

# Create labels for the x and y axes.
plt.xlabel("Months")
plt.ylabel("Avg_min_temp")

# Create title 
plt.title('Avergae Minimum Temperature by Month')

In [None]:
# The coldest month 
min_avg_temp = df_month_temp['avg_min_temp'].min()
month_min_temp=df_month_temp.loc[df_month_temp['avg_min_temp']==min_avg_temp]
month_min_temp


Months 3 and 4 have the average minimum temperature of -83 degrees.

In [None]:
# The warmest month
max_avg_temp = df_month_temp['avg_min_temp'].max()
month_max_temp=df_month_temp.loc[df_month_temp['avg_min_temp']==max_avg_temp]
month_max_temp

Month 8 is the warmest month 

## Finding the months with the highest and lowest atmospheric pressure on Mars

In [None]:
# Grouping DataFrame by the average of pressure for each month
df_by_month=df.groupby('month')
df_month_pressure=df_by_month.mean().loc[:,['pressure']]
df_month_pressure

In [None]:
# The month with the lowest atmospheric pressure  
min_avg_pressure= df_month_pressure['pressure'].min()
month_min_pressure=df_month_pressure.loc[df_month_pressure['pressure']==min_avg_pressure]
month_min_pressure


Month 6 is the month with the lowest pressure

In [None]:
# The month with the highest atmospheric pressure  
max_avg_pressure= df_month_pressure['pressure'].max()
month_max_pressure=df_month_pressure.loc[df_month_pressure['pressure']==max_avg_pressure]
month_max_pressure

Month 9 is the month with the highest pressure

In [None]:
# Plot avg_pressure
x=range(1,13)
plt.bar(x,df_month_pressure['pressure'])

# Create labels for the x and y axes.
plt.xlabel("Months")
plt.ylabel("Avg_pressure")

# Create title 
plt.title('Avergae Pressure by Month')

# How many terrestrial (Earth) days exist in a Martian year? 

In [None]:
#Finding the original Sun longitude
orig_long=df['ls'].loc[0]
orig_long

In [None]:
# Knowing that the Sun longitude will be the same after Approximately 1 Martial year
# I am going to find the next rows where longitude is equal the orig_long
same_long=df.loc[df['ls']==orig_long]
same_long

In [None]:
import datetime as dt
from datetime import timedelta
from datetime import datetime

a=same_long['terrestrial_date'].loc[0]
date_after_one_martial_year = same_long['terrestrial_date'].iloc[1]
one_martial_year= date_after_one_martial_year-a
print(f'In one Martial year, nearly ',one_martial_year,'pass on Earth')

In [None]:
# Ploting the minimum temperature vs Martian Days 
plt.bar(df['sol'],df['min_temp'])

# Create labels for the x and y axes.
plt.xlabel("Days")
plt.ylabel("Minimum Temperature ")

# Create title 
plt.title('Daily minimum temperature')

From the above graph one can notice that planet Mars circles the sun 3 times in 2000 days. This means that a Martial year is around 666 Terrestrial days (2000/3). Giving us almost the same number.

In [None]:
# Export the DataFrame to a CSV file
df.to_csv('mars_table.csv',index=False)