In [None]:
import pandas as pd
import yaml

df = pd.read_csv("./../artifacts/data_ingestion/raw/Train.csv")
df.head()


Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store,732.38
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052


In [None]:
schema = yaml.safe_load(open("./../configs/schema.yaml"))
schema


{'columns': ['Item_Identifier',
  'Item_Weight',
  'Item_Fat_Content',
  'Item_Visibility',
  'Item_Type',
  'Item_MRP',
  'Outlet_Identifier',
  'Outlet_Establishment_Year',
  'Outlet_Size',
  'Outlet_Location_Type',
  'Outlet_Type',
  'Item_Outlet_Sales'],
 'columns_datatype': {'Item_Identifier': 'object',
  'Item_Weight': 'float64',
  'Item_Fat_Content': 'object',
  'Item_Visibility': 'float64',
  'Item_Type': 'object',
  'Item_MRP': 'float64',
  'Outlet_Identifier': 'object',
  'Outlet_Establishment_Year': 'int64',
  'Outlet_Size': 'object',
  'Outlet_Location_Type': 'object',
  'Outlet_Type': 'object',
  'Item_Outlet_Sales': 'float64'}}

In [None]:
for col in schema["columns"]:
    print(col)


Item_Identifier
Item_Weight
Item_Fat_Content
Item_Visibility
Item_Type
Item_MRP
Outlet_Identifier
Outlet_Establishment_Year
Outlet_Size
Outlet_Location_Type
Outlet_Type
Item_Outlet_Sales


In [None]:
for col in df.columns:
    print(col, type(col))
    if col not in schema["columns"]:
        print(f"[ {col} ] is extra column present in dataset")
for col in schema["columns"]:
    if col not in df.columns:
        print(f"[ {col} ] column is not present in dataset")


Item_Identifier <class 'str'>
Item_Weight <class 'str'>
Item_Fat_Content <class 'str'>
Item_Visibility <class 'str'>
Item_Type <class 'str'>
Item_MRP <class 'str'>
Outlet_Identifier <class 'str'>
Outlet_Establishment_Year <class 'str'>
Outlet_Size <class 'str'>
Outlet_Location_Type <class 'str'>
Outlet_Type <class 'str'>
Item_Outlet_Sales <class 'str'>


In [None]:
schema["columns_datatype"]


{'Item_Identifier': 'object',
 'Item_Weight': 'float64',
 'Item_Fat_Content': 'object',
 'Item_Visibility': 'float64',
 'Item_Type': 'object',
 'Item_MRP': 'float64',
 'Outlet_Identifier': 'object',
 'Outlet_Establishment_Year': 'int64',
 'Outlet_Size': 'object',
 'Outlet_Location_Type': 'object',
 'Outlet_Type': 'object',
 'Item_Outlet_Sales': 'float64'}

In [None]:
for col in df.columns:
    if df[col].dtype != schema["columns_datatype"][col]:
        print(f"{col} is of type [ {df[col].dtype} ], required type [ {schema['columns'][col]} ]")


# Mlflow

- Start Server `mlflow server --default-artifact-root ./artifacts --host 0.0.0.0 --port 5000`
- [Deploy Model](https://mlflow.org/docs/latest/models.html#deploy-mlflow-models) `mlflow models serve -m model_uri -h host -p port --no-conda`
  - /ping
  - /health
  - /invocations
  - /version


In [2]:
import requests


In [3]:
import pandas as pd
import pickle


In [6]:
pre_obj = pickle.load(open("./../artifacts/data_preprocessing/preprocessed.pkl", "rb"))
pre_obj


In [7]:
requests.get("http://localhost:5000/health")


<Response [200]>

In [8]:
df = pd.read_csv("./../artifacts/data_ingestion/raw/Train.csv").head()
df = df.iloc[:, :-1]
df


Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1


In [20]:
data = pre_obj.transform(df.values)
data.shape, data[0].shape


((5, 40), (40,))

In [23]:
data[0]


array([-8.41871687e-01, -9.70732174e-01,  1.74745381e+00,  1.39540761e-01,
        1.56000000e+02,  1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        1.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        1.00000000e+00,  1.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  1.00000000e+00,  0.00000000e+00,  0.00000000e+00])

In [26]:
res = requests.post("http://localhost:5000/invocations", json={"instances": data.tolist()})
print(res.status_code, res.reason)
print(res.json())


200 OK
{'predictions': [4577.21337890625, 678.0516357421875, 2250.5615234375, 616.0460205078125, 800.0388793945312]}
