<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [None]:
import pandas as pd
from sklearn2pmml.pipeline import PMMLPipeline
from sklearn2pmml import sklearn2pmml
from sklearn.neighbors import KNeighborsRegressor
from openscoring import Openscoring
import numpy as np
from openscoring import EvaluationRequest
from sklearn.tree import DecisionTreeRegressor
import ast

# Prepare train data

In [None]:
movie_df = pd.read_csv("movies_metadata.csv")

In [None]:
train_df = movie_df[["budget","genres","vote_average"]]

In [None]:
train_df.head()

In [None]:
train_df.loc[:,"genres"] = train_df.genres.apply(lambda x: ast.literal_eval(x))

In [None]:
train_df.loc[:,"genres"] = train_df.genres.apply(lambda x: [dc['name'] for dc in x if len(x)>0])

In [None]:
all_genres = train_df.genres.explode().unique()

In [None]:
new_df = pd.DataFrame({x:[0]*train_df.shape[0] for x in all_genres})

In [None]:
new_train = pd.concat((train_df,new_df),axis=1)

In [None]:
new_train.head()


In [None]:
for i in range(new_train.shape[0]):
    for x in new_train.loc[i,"genres"]:
        new_train.loc[i,x] = 1


# post processing and normalizing

In [None]:
new_train = new_train[~new_train['budget'].str.contains('jpg')]

In [None]:
new_train.budget = new_train.budget.astype("int32")

In [None]:
new_train.budget = new_train.budget/new_train.budget.max()

In [None]:
new_train = new_train.drop(columns=["genres"])

In [None]:
new_train.dropna(inplace=True)

In [None]:
new_train.columns

In [None]:
new_train.iloc[:,[0]+list(range(2,new_train.shape[1]))]

In [None]:
new_train.drop(columns=[np.nan],inplace=True)

In [None]:
# trainX = new_train.iloc[:,[0]+list(range(2,new_train.shape[1]))]
trainX = new_train.drop(columns=["vote_average"])
trainy = new_train["vote_average"]

pipeline = PMMLPipeline([
("dt", DecisionTreeRegressor(max_depth = 3))
])
pipeline.fit(trainX, trainy)

sklearn2pmml(pipeline, "decisionTree.pmml", with_repr = True)

In [None]:
 trainX.columns

# deploying to openscoring

**start a server: java -jar openscoring-server-executable-2.1.1.jar**

In [None]:
os = Openscoring(base_url = "http://localhost:8080/openscoring")

**deploy: java -cp openscoring-client-executable-2.1.1.jar org.openscoring.client.Deployer --model http://localhost:8080/openscoring/model/DT --file decisionTree.pmml**

In [None]:
columns = []
col_spec = ""
arguments = {}
for genre in trainX.columns:
    new_name = genre.replace(" ","_")
    columns.append(new_name)
    block_str=""
    if genre == "budget":
        block_str = f" -X{new_name}=0.3"
        arguments[new_name]=0.3
    elif genre == "Animation":
        block_str = f" -X{new_name}=1"
        arguments[new_name]=1
    else:
        block_str = f" -X{new_name}=0"
        arguments[new_name]=0
    col_spec += block_str
    
columns
col_spec

In [None]:
arguments

In [None]:
results = os.evaluate("DT", arguments)
print(results)


In [None]:

evaluationRequest = EvaluationRequest("001", arguments)

evaluationResponse = os.evaluate("DT", evaluationRequest)
print(evaluationResponse.results)

Equivalently, use 

**java -cp openscoring-client-executable-2.1.1.jar org.openscoring.client.Evaluator --model http://localhost:8080/openscoring/model/DT  -Xbudget=0.3 -XAnimation=1 -XComedy=0 -XFamily=0 -XAdventure=0 -XFantasy=0 -XRomance=0 -XDrama=0 -XAction=0 -XCrime=0 -XThriller=0 -XHorror=0 -XHistory=0 -XScience_Fiction=0 -XMystery=0 -XWar=0 -XForeign=0 -XMusic=0 -XDocumentary=0 -XWestern=0 -XTV_Movie=0 -XCarousel_Productions=0 -XVision_View_Entertainment=0 -XTelescene_Film_Group_Productions=0 -XAniplex=0 -XGoHands=0 -XBROSTA_TV=0 -XMardock_Scramble_Production_Committee=0 -XSentai_Filmworks=0 -XOdyssey_Media=0 -XPulser_Productions=0 -XRogue_State=0 -XThe_Cartel=0**
