In [404]:
import pandas as pd # Data Analysis library for working with tabular data

# Load our AtlasIV data from the CSV file
df = pd.read_csv('AtlasIV.csv')
df.head()

Unnamed: 0,day,regen,battery,reflector,distance,data,radiation
0,1,N,89.4,5.0,1296,241,10.3
1,2,N,88.6,,1291,254,10.2
2,3,Y,91.9,1.0,1334,169,10.2
3,4,N,89.9,,1301,214,10.3
4,5,Y,90.5,4.0,1203,204,10.3


In [405]:
# Clean the regen column to be 0 for no recharge or 1 for recharged
df['regen'].where(~(df['regen'] == 'N'), other=False, inplace=True)
df['regen'].where(~(df['regen'] == 'Y'), other=True, inplace=True)
df['regen'] = df['regen'].astype(bool)

# Set the index column to be the day, since it's the unique ID of each row. Also sort sequentially
df.sort_values(by='day', inplace=True)
df.set_index('day', inplace=True)

# Use Reflector of "None" for NaN values and treat the reflector column like a string column
df['hit reflector'] = df['reflector'] >= 1
df['reflector_str'] = df['reflector'].fillna('None')
df['reflector_str'] = df['reflector'].astype(str)
df['reflector_str'].replace(to_replace=['1.0'], value='Reflector #1', inplace=True)
df['reflector_str'].replace(to_replace=['2.0'], value='Reflector #2', inplace=True)
df['reflector_str'].replace(to_replace=['3.0'], value='Reflector #3', inplace=True)
df['reflector_str'].replace(to_replace=['4.0'], value='Reflector #4', inplace=True)
df['reflector_str'].replace(to_replace=['5.0'], value='Reflector #5', inplace=True)

df.head()

Unnamed: 0_level_0,regen,battery,reflector,distance,data,radiation,hit reflector,reflector_str
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,False,89.4,5.0,1296,241,10.3,True,Reflector #5
2,False,88.6,,1291,254,10.2,False,
3,True,91.9,1.0,1334,169,10.2,True,Reflector #1
4,False,89.9,,1301,214,10.3,False,
5,True,90.5,4.0,1203,204,10.3,True,Reflector #4


In [406]:
# Engineer a battery delta column
df['battery delta'] = df['battery'].diff()

# Replace the first NaN value with a 0
df['battery delta'] = df['battery delta'].fillna(0)

df.head()

Unnamed: 0_level_0,regen,battery,reflector,distance,data,radiation,hit reflector,reflector_str,battery delta
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,False,89.4,5.0,1296,241,10.3,True,Reflector #5,0.0
2,False,88.6,,1291,254,10.2,False,,-0.8
3,True,91.9,1.0,1334,169,10.2,True,Reflector #1,3.3
4,False,89.9,,1301,214,10.3,False,,-2.0
5,True,90.5,4.0,1203,204,10.3,True,Reflector #4,0.6


In [407]:
df.describe()

Unnamed: 0,battery,reflector,distance,data,radiation,battery delta
count,365.0,196.0,365.0,365.0,365.0,365.0
mean,85.959178,2.923469,1227.616438,315.205479,10.238904,-0.020548
std,2.706698,1.463841,59.161037,74.287413,0.225,1.994316
min,78.4,1.0,1061.0,105.0,9.7,-5.5
25%,84.0,1.75,1189.0,262.0,10.1,-1.2
50%,86.0,3.0,1228.0,317.0,10.2,0.1
75%,88.0,4.0,1268.0,367.0,10.3,1.1
max,92.7,5.0,1377.0,497.0,11.6,5.9


In [408]:
import plotly.express as px # Data visualization tool

plotly_options = {
    'labels': {
        'battery': 'Beginning of day Battery %',
        'battery delta': 'Relative Change in Starting Battery %',
        'day': 'Mission Day',
        'regen': 'Regenerated?',
        'distance': 'Distance Travelled (km)',
        'radiation': 'Radiation (Gy)',
        'data': 'Data Transmitted (kb)',
        'reflector': 'Reflector',
        'reflector_str': 'Reflector',
        'hit reflector': 'Hit Reflector?',
    }
}

histogram_options = {
    'color_discrete_sequence':px.colors.sequential.Agsunset,
    'height': 500,
    **plotly_options
}

In [409]:
px.histogram(df, x='battery', title='Start Battery % Distribution', marginal='violin', **histogram_options)

In [410]:
px.histogram(df, x='battery delta', title='Daily Battery Change', marginal='violin', **histogram_options)

In [411]:
px.histogram(df, x='regen', title='Regenerated?', color='regen', **histogram_options)

In [412]:
px.histogram(df, x='hit reflector', title='Relationship of Regeneration and Relfectors', color='regen', facet_col='regen', **histogram_options)

In [413]:
px.histogram(df.sort_values('reflector_str', ascending=True), x='reflector_str', title='Reflector Hit', color='reflector_str', **histogram_options)

In [414]:
px.histogram(df.sort_values('reflector_str'), x='battery delta', title='Battery Recharge by Reflector Hit', color='hit reflector', nbins=75, **histogram_options)

In [415]:
px.histogram(df.sort_values('reflector_str'), x='battery delta', title='Battery Recharge by Reflector Hit', color='reflector_str', nbins=75, **histogram_options)

In [416]:
px.box(df.sort_values('reflector_str'), x='battery delta', y='reflector_str', title='Battery Recharge by Reflector Hit', color='reflector_str', points='suspectedoutliers', **histogram_options)

In [417]:
px.histogram(df, x='battery delta', title='Battery Change by Regenerated', color="regen", facet_col='regen', **histogram_options)

In [418]:
px.histogram(df, x='distance', title='Distance Travelled by Regeneration', color="regen", facet_col='regen', **histogram_options)

In [419]:
px.box(df, y='distance', x='regen', title='Distance Travelled', color='regen', points='outliers', **histogram_options)

In [420]:
px.histogram(df, x='radiation', title='Radiation Encountered', marginal='violin', **histogram_options)

In [421]:
px.histogram(df, x='data', title='Data Transmitted', marginal='violin', color='regen', **histogram_options)

In [422]:
px.line(df, x=df.index, y='distance', title='Daily Distance Travelled', color_discrete_sequence=px.colors.qualitative.Alphabet_r, **plotly_options)

In [423]:
px.line(df, x=df.index, y='battery', title='Daily Starting Battery', color='regen', color_discrete_sequence=px.colors.qualitative.Dark24_r, **plotly_options)

In [424]:
px.line(df, x=df.index, y='data', title='Daily Data Transmitted', color='regen', color_discrete_sequence=px.colors.qualitative.Dark24_r, **plotly_options)

In [425]:
corr = df.corr()

corr

Unnamed: 0,regen,battery,reflector,distance,data,radiation,hit reflector,battery delta
regen,1.0,0.260619,-0.101845,0.193348,-0.22485,0.005224,0.994508,0.789572
battery,0.260619,1.0,-0.239058,0.773549,-0.746683,-0.004332,0.267716,0.369623
reflector,-0.101845,-0.239058,1.0,-0.166443,0.2097,0.034807,,-0.902655
distance,0.193348,0.773549,-0.166443,1.0,-0.560387,0.061224,0.199783,0.266635
data,-0.22485,-0.746683,0.2097,-0.560387,1.0,0.017189,-0.230434,-0.275578
radiation,0.005224,-0.004332,0.034807,0.061224,0.017189,1.0,0.00672,-0.023805
hit reflector,0.994508,0.267716,,0.199783,-0.230434,0.00672,1.0,0.789938
battery delta,0.789572,0.369623,-0.902655,0.266635,-0.275578,-0.023805,0.789938,1.0


In [426]:
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff

z = np.array(corr)

fig = ff.create_annotated_heatmap(
        z,
        x = list(corr.columns),
        y = list(corr.index),
        annotation_text = np.around(z, decimals=2),
        hoverinfo='z',
        colorscale=px.colors.diverging.balance,
    )

fig.update_layout(title='Correlations Analysis Matrix')

fig.show()