In [25]:
import pickle
import pandas as pd
import random

In [26]:
def load_model():
    '''Function to load the best model'''
    
    with open('../models/xgboost_best_model.pickle', 'rb') as f:
        model = pickle.load(f)

    return model

model = load_model()

In [27]:
schema = {'warranty': 'category',
 'sub_status': 'category',
 'condition': 'category',
 'deal_ids': 'float',
 'base_price': 'float',
 'seller_id': 'category',
 'listing_type_id': 'category',
 'price': 'float',
 'buying_mode': 'category',
 'parent_item_id': 'category',
 'category_id': 'category',
 'official_store_id': 'category',
 'accepts_mercadopago': 'category',
 'original_price': 'float',
 'currency_id': 'category',
 'automatic_relist': 'category',
 'status': 'category',
 'initial_quantity': 'float',
 'sold_quantity': 'float',
 'available_quantity': 'float',
 'seller_address.longitude': 'float',
 'seller_address.id': 'category',
 'seller_address.address_line': 'category',
 'seller_address.latitude': 'float',
 'seller_address.search_location.neighborhood.id': 'category',
 'seller_address.search_location.state.id': 'category',
 'seller_address.search_location.city.id': 'category',
 'seller_address.zip_code': 'category',
 'seller_address.city.id': 'category',
 'seller_address.state.id': 'category',
 'shipping.local_pick_up': 'category',
 'shipping.tags': 'category',
 'shipping.mode': 'category',
 'shipping.free_methods': 'category',
 'seller_contact.phone2': 'category',
 'seller_contact.webpage': 'category',
 'seller_contact.email': 'category',
 'seller_contact.contact': 'category',
 'seller_contact.area_code': 'category',
 'seller_contact.other_info': 'category',
 'seller_contact.phone': 'category',
 'location.open_hours': 'category',
 'location.neighborhood.id': 'category',
 'location.longitude': 'float',
 'location.address_line': 'category',
 'location.latitude': 'float',
 'location.city.id': 'category',
 'location.state.id': 'category',
 'non_mercado_pago_payment_methods.MLAWC': 'category',
 'non_mercado_pago_payment_methods.MLACD': 'category',
 'non_mercado_pago_payment_methods.MLAVS': 'category',
 'non_mercado_pago_payment_methods.MLAMO': 'category',
 'non_mercado_pago_payment_methods.MLADC': 'category',
 'non_mercado_pago_payment_methods.MLAMP': 'category',
 'non_mercado_pago_payment_methods.MLAMC': 'category',
 'non_mercado_pago_payment_methods.MLABC': 'category',
 'non_mercado_pago_payment_methods.MLAOT': 'category',
 'non_mercado_pago_payment_methods.MLAAM': 'category',
 'non_mercado_pago_payment_methods.MLAWT': 'category',
 'non_mercado_pago_payment_methods.MLAVE': 'category',
 'non_mercado_pago_payment_methods.MLATB': 'category',
 'non_mercado_pago_payment_methods.MLAMS': 'category',
 'tags.good_quality_thumbnail': 'category',
 'tags.dragged_visits': 'category',
 'tags.free_relist': 'category',
 'tags.dragged_bids_and_visits': 'category',
 'tags.poor_quality_thumbnail': 'category',
 'pictures.large': 'float',
 'pictures.medium': 'float',
 'pictures.small': 'float',
 'title.new': 'category',
 'video': 'category',
 'seller_address.product_count': 'float'}

In [28]:
def load_csv(schema: dict) -> pd.DataFrame:
    '''Function to load dclean data'''

    df = pd.read_csv('../data/clean/output.csv', dtype=schema)

    # Drop rows with 70+ missing values
    df = df.drop(16647)
    df = df.drop(83078)
    df = df.drop(92230)
    df = df.drop(82275)
    df = df.drop(88864)


    cols_to_drop = ['deal_ids', 'tags.poor_quality_thumbnail', 'original_price', 
                    'shipping.tags', 'seller_contact.other_info',	'seller_contact.phone',	'location.open_hours', 
                    'location.address_line', 'tags.poor_quality_thumbnail']
    
    df_cleaned = df.drop(columns=cols_to_drop)
    
    return df_cleaned

df = load_csv(schema)

In [30]:
def select_random_index(df: pd.DataFrame):
    '''Function that selects a random index from the DataFrame'''
    return random.choice(df.index)

random_index = select_random_index(df)

In [39]:
data_for_prediction = df.drop(columns=['condition']).iloc[[random_index]]
data_for_prediction

In [44]:
# Make a prediction
prediction = model.predict(data_for_prediction)
prediction[0]

1