# Movie Success pipeline

In [None]:
EXPERIMENT_NAME = 'movie-success'
BUCKET = "your-bucket-name"

## Imports

In [None]:
import kfp
from kfp import compiler
import kfp.components as comp
import kfp.dsl as dsl
from kfp import gcp

## Load components

In [None]:
preprocess_operation = kfp.components.load_component_from_url(
    'https://storage.googleapis.com/{}/components/preprocess/component.yaml'.format(BUCKET))
help(preprocess_operation)

train_operation = kfp.components.load_component_from_url(
    'https://storage.googleapis.com/{}/components/train/component.yaml'.format(BUCKET))
help(train_operation)

ai_platform_deploy_operation = comp.load_component_from_url(
    "https://storage.googleapis.com/{}/components/deploy/component.yaml".format(BUCKET))
help(ai_platform_deploy_operation)

## Build the Pipeline

In [None]:
@dsl.pipeline(
  name='Movie Successs Pipeline',
  description='Performs preprocessing, training and deployment.'
)
def pipeline():
    
    preprocess_task = preprocess_operation(
        input_1_uri='gs://kubeflow-examples-data/named_entity_recognition_dataset/ner.csv,  # Upload data to GCS and drop URL
        output_y_uri_template="gs://{}/{{workflow.uid}}/preprocess/y/data".format(BUCKET),
        output_x_uri_template="gs://{}/{{workflow.uid}}/preprocess/x/data".format(BUCKET),
        output_preprocessing_state_uri_template="gs://{}/{{workflow.uid}}/model".format(BUCKET)
    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) 
    
    
    train_task = train_operation(
        input_x_uri=preprocess_task.outputs['output-x-uri'],
        input_y_uri=preprocess_task.outputs['output-y-uri'],
        input_job_dir_uri="gs://{}/{{workflow.uid}}/job".format(BUCKET),
        output_model_uri_template="gs://{}/{{workflow.uid}}/model".format(BUCKET)
    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa')) 
    
    
    deploy_task = ai_platform_deploy_operation(
        model_path= train_task.output,
        model_name="movie_success_kubeflow",
        model_region="us-central1",
        model_version="version1",
        model_runtime_version="2.3",
        model_prediction_class="model_prediction.CustomModelPrediction",
        model_python_version="3.7",
        model_package_uris="gs://{}/routine/custom_prediction_routine-0.2.tar.gz".format(BUCKET)
    ).apply(kfp.gcp.use_gcp_secret('user-gcp-sa'))

## Compile the Pipeline

In [None]:
pipeline_func = pipeline
pipeline_filename = pipeline_func.__name__ + '.pipeline.zip'

import kfp.compiler as compiler
compiler.Compiler().compile(pipeline_func, pipeline_filename)

## Create a Kubeflow Experiment

In [None]:
client = kfp.Client()

try:
    experiment = client.get_experiment(experiment_name=EXPERIMENT_NAME)
except:
    experiment = client.create_experiment(EXPERIMENT_NAME)
    
print(experiment)

## Run the Pipeline

In [None]:
arguments = {}

run_name = pipeline_func.__name__ + ' run'
run_result = client.run_pipeline(experiment.id, 
                                 run_name, 
                                 pipeline_filename, 
                                 arguments)

print(experiment.id)
print(run_name)
print(pipeline_filename)
print(arguments)

In [13]:
!python -m pip install fs-gcsfs
!python -m pip install gcsfs
!python -m pip install fsspec





In [14]:
import gcsfs

In [15]:
import pandas as pd
data_movies = pd.read_csv("gs://movie-success-bucket/data/tmdb_5000_movies.csv/tmdb_5000_movies.csv")

In [16]:
data_movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


In [17]:
data_credits = pd.read_csv("gs://movie-success-bucket/data/tmdb_5000_credits.csv/tmdb_5000_credits.csv")
data_credits.head(2)

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."


In [None]:
gs://movie-success-bucket/data/tmdb_5000_credits.csv/tmdb_5000_credits.csv