In [0]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop,Adam

import pickle

In [0]:
!pip install mlflow

In [0]:
import mlflow

# **Predictions**

In [0]:
new_data = pd.read_csv("/dbfs/FileStore/shared_uploads/agusbaffo@gmail.com/new_data.csv")

In [0]:
new_data.head()

Unnamed: 0.1,Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Inflight wifi service,Departure/Arrival time convenient,Ease of Online booking,Gate location,Food and drink,Online boarding,Seat comfort,Inflight entertainment,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Cleanliness,Departure Delay in Minutes,Arrival Delay in Minutes
0,0,19556,Female,Loyal Customer,52,Business travel,Eco,160,5,4,3,4,3,4,3,5,5,5,5,2,5,5,50,44.0
1,1,90035,Female,Loyal Customer,36,Business travel,Business,2863,1,1,3,1,5,4,5,4,4,4,4,3,4,5,0,0.0
2,2,12360,Male,disloyal Customer,20,Business travel,Eco,192,2,0,2,4,2,2,2,2,4,1,3,2,2,2,0,0.0


In [0]:
def preprocess(X_):

  X = X_.copy()

  ## Load_data
  preprocess_dict = pickle.load(open("/dbfs/FileStore/shared_uploads/agusbaffo@gmail.com/preprocess.pickle","rb"))
  sc = preprocess_dict["sc"]
  pca1 = preprocess_dict["pca1"]
  pca2 = preprocess_dict["pca2"]
  
  numerical_cols = ['Age',
    'Flight Distance',
    'Inflight wifi service',
    'Departure/Arrival time convenient',
    'Ease of Online booking',
    'Gate location',
    'Food and drink',
    'Online boarding',
    'Seat comfort',
    'Inflight entertainment',
    'On-board service',
    'Leg room service',
    'Baggage handling',
    'Checkin service',
    'Inflight service',
    'Cleanliness',
    'Departure Delay in Minutes',
    'Arrival Delay in Minutes']

  # Drop NaN
  X.dropna(axis=0, inplace=True)

  #Scaling the data
  X[numerical_cols] = sc.transform(X[numerical_cols])

  #Using PCA to reduce the dimensions of highly correlated features
  X['PCA1'] = pca1.transform(X[['Inflight wifi service', 'Ease of Online booking']])
  X['PCA2'] = pca2.transform(X[['Cleanliness', 'Inflight entertainment','Seat comfort','Food and drink']])

  # Drop columns
  X.drop(['Cleanliness','Inflight entertainment','Seat comfort','Food and drink','Inflight wifi service',
                'Ease of Online booking','Gender','Unnamed: 0','id'], axis=1, inplace=True)
                
  #Mapping the Customer Type, Type of Travel and Class Columns in the Testing Data
  X['Customer Type'] = X['Customer Type'].map({'disloyal Customer': 0, 'Loyal Customer' :1})
  X['Type of Travel'] = X['Type of Travel'].map({'Personal Travel': 0, 'Business travel' :1})
  X['Class'] = X['Class'].map({'Eco': 0, 'Eco Plus' :1, 'Business': 2})

  return X

In [0]:
prepro_new_data = preprocess(new_data)
prepro_new_data.head()

Unnamed: 0,Customer Type,Age,Type of Travel,Class,Flight Distance,Departure/Arrival time convenient,Gate location,Online boarding,On-board service,Leg room service,Baggage handling,Checkin service,Inflight service,Departure Delay in Minutes,Arrival Delay in Minutes,PCA1,PCA2
0,1,0.835009,1,0,-1.03212,0.616249,0.800627,0.555423,1.255467,1.253304,1.158561,-1.030767,1.156211,0.924849,0.744766,-1.331779,-1.060303
1,1,-0.223679,1,2,1.678219,-1.350673,-1.547312,0.555423,0.479237,0.493081,0.311853,-0.240497,0.30558,-0.386917,-0.392229,0.798285,-2.159918
2,0,-1.282366,1,0,-1.000033,-2.006314,0.800627,-0.926688,0.479237,-1.78759,-0.534854,-1.030767,-1.395683,-0.386917,-0.392229,0.771233,1.99678


## Load model and predict new data

In [0]:
logged_model = 'runs:/8be3597813b74db29363112cd439b91e/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
rf_pred = loaded_model.predict(pd.DataFrame(prepro_new_data))
rf_pred

In [0]:
logged_model = 'runs:/a73181d61519474bb19cd569b6359bb1/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
ann_pred = np.array(loaded_model.predict(pd.DataFrame(prepro_new_data))>0.5, dtype=int)
ann_pred

## Registering models

In [0]:
model_name = "aps_model"
ret = mlflow.register_model('runs:/8be3597813b74db29363112cd439b91e/model',model_name) # RF model
ret = mlflow.register_model('runs:/a73181d61519474bb19cd569b6359bb1/model',model_name) # ANN model