# Install & Import Library

In [1]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [2]:
if IN_COLAB:
    !apt-get install openjdk-8-jdk-headless -qq > /dev/null
    !wget -q https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz
    !tar xf spark-3.3.2-bin-hadoop3.tgz
    !mv spark-3.3.2-bin-hadoop3 spark
    !pip install -q findspark

In [3]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark"

In [4]:
import findspark
findspark.init()
spark_url = 'local'

In [5]:
from pyspark.sql import SparkSession
spark = SparkSession.builder\
        .master(spark_url)\
        .appName('Spark ML')\
        .getOrCreate()

In [6]:
!pip install mlflow --quiet
!pip install pyngrok --quiet
!pip install fastapi --quiet
!pip install uvicorn --quiet
!pip install pickle5 --quiet
!pip install pydantic --quiet
!pip install requests --quiet
!pip install pypi-json --quiet
!pip install pyngrok --quiet
!pip install nest-asyncio --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.5/83.5 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.1/148.1 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m6.7 MB/s[0

In [7]:
from fastapi import FastAPI
from pydantic import BaseModel
import pickle
import json
import uvicorn
from pyngrok import ngrok
from fastapi.middleware.cors import CORSMiddleware
import nest_asyncio
import mlflow
import pandas as pd
from pyspark.sql.functions import col

# Load MLflow Model (upload mlruns.zip)

In [8]:
import zipfile

# Set the destination folder to extract the contents
destination_folder = "/content/mlruns"
zip_path = "/content/mlruns.zip"
# Extract the contents of the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

# Start App

In [15]:
app = FastAPI()

In [16]:
origins = ["*"]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

In [17]:
from typing import List
class model_input(BaseModel):
    latitude : float
    longitude : float
    comment_len : int
    ptype: List[str]

In [18]:
logged_model = 'runs:/d0c3014811dc43608ab1fd9c42a9969c/random_forest_model' #change model to deploy here
loaded_model = mlflow.spark.load_model(logged_model)
@app.post("/predict")
def predict(input_parameters : model_input):
    input_data = input_parameters.json()
    input_dictionary = json.loads(input_data)
    rows = [
        {
            'latitude': input_dictionary['latitude'],
            'longitude': input_dictionary['longitude'],
            'comment_len': input_dictionary['comment_len'],
            'type_exploded': ptype
        }
        for ptype in input_dictionary['ptype']
    ]

    # Create DataFrame from the list of dictionaries
    df = spark.createDataFrame(rows)

    pred = loaded_model.transform(df)
    mean_prediction = pred.selectExpr("mean(prediction)").collect()[0][0]
    return mean_prediction

2023/05/18 08:27:00 INFO mlflow.spark: 'runs:/d0c3014811dc43608ab1fd9c42a9969c/random_forest_model' resolved as 'file:///content/mlruns/942725219065911568/d0c3014811dc43608ab1fd9c42a9969c/artifacts/random_forest_model'
2023/05/18 08:27:00 INFO mlflow.spark: URI 'runs:/d0c3014811dc43608ab1fd9c42a9969c/random_forest_model/sparkml' does not point to the current DFS.
2023/05/18 08:27:00 INFO mlflow.spark: File 'runs:/d0c3014811dc43608ab1fd9c42a9969c/random_forest_model/sparkml' not found on DFS. Will attempt to upload the file.


In [19]:
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)



Public URL: https://e0bc-35-245-156-138.ngrok.io


INFO:     Started server process [173]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     184.22.208.16:0 - "POST /predict HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [173]


Using public url of ngrok to send post request at /predict
by using json field

1.   latitude : float #latitude of problem location
2.   longitude : float #longitude of problem location
3.   comment_len : int #length of comment to describe problem
4.   ptype : list of string #problem type

will return as number of date for problem to finish




```
#Example of json
{
    "latitude": 100.53,
    "longitude": 13.72,
    "comment_len": 46,
    "ptype": ["สะพาน","ถนน"]
}
```

