# Episode 06: Computing in the Cloud.

### In Episode 06 we re-ran MovieStatsFlow on AWS using using remote storage, metadata, and compute. This notebook shows how you can access your artifacts from anywhere. 

## Import the metaflow client

In [None]:
from metaflow import Flow, get_metadata, namespace
import matplotlib.pyplot as plt
print("Current metadata provider: %s" % get_metadata())

## Get the latest successful run of MovieStatsFlow

In [None]:
# Set namespace to None to search over all namespaces
namespace(None)
run = Flow('MovieStatsFlow').latest_successful_run
print("Using run: %s" % str(run))

## You can get all of your data artifacts from the remote datastore, even the 'movies.csv' input file. Let's print the last line of the file.

In [None]:
movies_csv = run.data.movie_data
lines = [line for line in movies_csv.split('\n') if line]
print("The best movie ever made:")
print(lines[-1])

## Get the genre-specific movie statistics

In [None]:
genre_stats = run.data.genre_stats

## Create a bar plot of the median gross box office for the top-5 grossing genres

In [None]:
# Get median for each genre
data = [(genre, data['quartiles'][1]) \
        for genre, data \
        in genre_stats.items()]

# Sort and unpack into a list of labels, and medians
genre, median = zip(*[(genre, median)\
                      for genre, median\
                      in sorted(data, key=lambda pair: pair[1])])

# Create the bar plot
plt.bar(genre[-5:], median[-5:], align='center', alpha=0.5)
plt.ylabel("Gross Box office (US Dollars)")
plt.show()