In [1]:
import requests

vehicle_data = {
    'Cylinders': [4, 6, 8],
    'Displacement': [155.0, 160.0, 165.5],
    'Horsepower': [93.0, 130.0, 98.0],
    'Weight': [2500.0, 3150.0, 2600.0],
    'Acceleration': [15.0, 14.0, 16.0],
    'Model Year': [81, 80, 78],
    'Origin': [3, 2, 1]
}

url = 'https://fuel-usage-prediction.herokuapp.com/predict'
r = requests.post(url, json = vehicle_data)

r.text.strip()

'{"y_pred":[33.54333333333334,17.64333333333333,21.293333333333333]}'

In [9]:
import pandas as pd

from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

# Tansforms the numerical values in the Origin column to Strings
def map_origin_col(input_df: pd.DataFrame) -> pd.DataFrame:
    """_summary_

    Args:
        input_df (pd.DataFrame): _description_

    Returns:
        pd.DataFrame: _description_
    """    
    
    mapped_df = input_df.copy()
    mapped_df["Origin"] = mapped_df["Origin"].map({1: "India", 2: "USA", 3: "Germany"})
    return mapped_df


def num_preproc_pipeline() -> Pipeline:
    """_summary_

    Returns:
        Pipeline: _description_
    """    
    
    imputer = SimpleImputer(strategy="median")
    scaler = StandardScaler()
    
    num_pipeline = Pipeline([("imputer", imputer), 
                             ("scaler", scaler)], 
                            verbose=True)
    
    return num_pipeline

def cat_preproc_pipeline() -> Pipeline:
    """_summary_

    Returns:
        Pipeline: _description_
    """            
    ohe = OneHotEncoder()
    
    cat_pipeline = Pipeline([("one_hot_encoder", ohe)], 
                            verbose=True)
    
    return cat_pipeline

def full_preproc_ct(X_input: pd.DataFrame) -> tuple[pd.DataFrame, ColumnTransformer]:
    """_summary_

    Args:
        X_input (pd.DataFrame): _description_

    Returns:
        tuple[pd.DataFrame, ColumnTransformer]: _description_
    """
    
    num_pipeline = num_preproc_pipeline()
    cat_pipeline = cat_preproc_pipeline()
    
    num_attributes = X_input.select_dtypes(include=["float", "int64"]).columns
    cat_attributes = X_input.select_dtypes(include=["object"]).columns
    print(cat_attributes)
    full_pipeline = ColumnTransformer(
        [("cat", cat_pipeline, cat_attributes), 
         ("num", num_pipeline, num_attributes)],
        verbose=True,
    )

    preprocessed_data = full_pipeline.fit_transform(X_input)

    return preprocessed_data, full_pipeline


In [3]:
input_data = {
  "cylinders": 4,
  "displacement": 155.0,
  "horsepower": 93.0,
  "weight": 2500.0,
  "acceleration": 15.0,
  "model_year": 81,
  "Origin": 1
}

df = pd.DataFrame([input_data])
df.head()

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,Origin
0,4,155.0,93.0,2500.0,15.0,81,1


In [10]:
test = full_preproc_ct(df)

Index(['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration',
       'model_year', 'Origin'],
      dtype='object') Index([], dtype='object')
[Pipeline] ........... (step 1 of 2) Processing imputer, total=   0.0s
[Pipeline] ............ (step 2 of 2) Processing scaler, total=   0.0s
[ColumnTransformer] ........... (1 of 1) Processing num, total=   0.0s


In [5]:
test

(array([[0., 0., 0., 0., 0., 0., 0.]]),
 ColumnTransformer(transformers=[('cat',
                                  Pipeline(steps=[('one_hot_encoder',
                                                   OneHotEncoder())],
                                           verbose=True),
                                  Index([], dtype='object')),
                                 ('num',
                                  Pipeline(steps=[('imputer',
                                                   SimpleImputer(strategy='median')),
                                                  ('scaler', StandardScaler())],
                                           verbose=True),
                                  Index(['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration',
        'model_year', 'Origin'],
       dtype='object'))],
                   verbose=True))