<a href="https://colab.research.google.com/github/ChiefGupta/Project1/blob/main/Project1_Demonstrator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, hstack
import lightgbm as lgb
!pip install joblib
import joblib

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [4]:
model = lgb.Booster(model_file='/content/drive/My Drive/trained_lightgbm_model.txt')

In [9]:
cv = joblib.load('/content/drive/My Drive/database/cv.pkl')
tv = joblib.load('/content/drive/My Drive/database/tv.pkl')
lb = joblib.load('/content/drive/My Drive/database/lb.pkl')

In [28]:
def handle_missing_inplace(dataset):
    # Convert 'category_name' to categorical type if it's not already
    if not pd.api.types.is_categorical_dtype(dataset['category_name']):
        dataset['category_name'] = dataset['category_name'].astype('category')

    # Check if 'missing' category exists before adding
    if 'missing' not in dataset['category_name'].cat.categories:
        dataset['category_name'] = dataset['category_name'].cat.add_categories('missing')
    dataset['category_name'].fillna(value='missing', inplace=True)
    dataset['brand_name'].fillna(value='missing', inplace=True)
    dataset['item_description'].replace('No description yet', 'missing', inplace=True)
    dataset['item_description'].fillna(value='missing', inplace=True)

def to_categorical(dataset):
    dataset['category_name'] = dataset['category_name'].astype('category')
    dataset['brand_name'] = dataset['brand_name'].astype('category')
    dataset['item_condition_id'] = dataset['item_condition_id'].astype('category')

In [38]:
manual_input = pd.DataFrame({
    'name': ['Vintage Designer Bag'],  # Example: ['Vintage Designer Bag']
    'item_condition_id': [3],  # Example: [3]
    'category_name': ['Women/Bags/Handbags'],  # Example: ['Women/Bags/Handbags']
    'brand_name': ['Gucci'],  # Example: ['Gucci']
    'shipping': [1],  # Example: [1] if shipping paid by seller
    'item_description': ['A luxury handbag in great condition']  # Example: ['A luxury handbag in great condition.']
})

manual_input.head()

Unnamed: 0,name,item_condition_id,category_name,brand_name,shipping,item_description
0,Vintage Designer Bag,3,Women/Bags/Handbags,Gucci,1,A luxury handbag in great condition


In [None]:
# Apply preprocessing to the input data
handle_missing_inplace(manual_input)
to_categorical(manual_input)

# Vectorize the features using the loaded vectorizers
# Limit the vocabulary to the features the model was trained on
X_name_input = cv.transform(manual_input['name'])
X_category_input = cv.transform(manual_input['category_name'])
X_description_input = tv.transform(manual_input['item_description'])
X_brand_input = lb.transform(manual_input['brand_name'])
X_dummies_input = csr_matrix(pd.get_dummies(manual_input[['item_condition_id', 'shipping']], sparse=True).values)


# Create a sparse matrix for the input data
sparse_input = hstack((X_dummies_input, X_description_input, X_brand_input, X_category_input, X_name_input)).tocsr()

In [40]:
# Predict the price using the loaded model
predicted_price = model.predict(sparse_input, num_iteration=model.best_iteration, predict_disable_shape_check=True)

# If the model was trained on log-transformed prices, apply exponential transformation to get the actual price
predicted_price = np.expm1(predicted_price)[0]

In [41]:
print(f"The recommended price for the product is: {predicted_price:.2f}")

The recommended price for the product is: 14.59
