In [1]:
# Importing all the dependencies
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from xgboost import XGBRegressor

# Data Collection and analysis
dataset = pd.read_csv(r"C:\Users\Nishant\Desktop\ML\Big Mart Sales\Data.csv")

# Handling Missing Values
dataset['Item_Weight'].fillna(dataset['Item_Weight'].mean(), inplace=True)
mode_of_outlet_size = dataset.pivot_table(values='Outlet_Size', columns='Outlet_Type', aggfunc=lambda x: x.mode()[0])
missing_values = dataset['Outlet_Size'].isnull()
dataset.loc[missing_values, 'Outlet_Size'] = dataset.loc[missing_values, 'Outlet_Type'].apply(lambda x: mode_of_outlet_size[x])

# Data Preprocessing
dataset.replace({'Item_Fat_Content': {'low fat': 'Low Fat', 'LF': 'Low Fat', 'reg': 'Regular'}}, inplace=True)
encoder = LabelEncoder()
for column in ['Item_Identifier', 'Item_Fat_Content', 'Item_Type', 'Outlet_Identifier', 
               'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type']:
    dataset[column] = encoder.fit_transform(dataset[column].astype(str))

# Splitting features and target
X = dataset.drop(columns='Item_Outlet_Sales', axis=1)
Y = dataset['Item_Outlet_Sales']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

# Machine Learning Model Training
regressor = XGBRegressor()
regressor.fit(X_train, Y_train)

# Predictive System
def predictive_system(input_data):
    """
    Predicts Item Outlet Sales based on the input data.
    
    Parameters:
    input_data (dict): Dictionary containing all input feature values.
    
    Returns:
    float: Predicted sales value.
    """
    # Convert the input dictionary to a DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Apply label encoding to categorical features
    for column in ['Item_Identifier', 'Item_Fat_Content', 'Item_Type', 'Outlet_Identifier', 
                   'Outlet_Size', 'Outlet_Location_Type', 'Outlet_Type']:
        input_df[column] = encoder.fit_transform(input_df[column].astype(str))
    
    # Ensure column order matches training set
    input_df = input_df[X.columns]
    
    # Predict using the trained model
    prediction = regressor.predict(input_df)
    return prediction[0]

# Example usage of the predictive system
input_example = {
    'Item_Identifier': 'FDA15',
    'Item_Weight': 9.3,
    'Item_Fat_Content': 'Low Fat',
    'Item_Visibility': 0.016047,
    'Item_Type': 'Dairy',
    'Item_MRP': 249.8092,
    'Outlet_Identifier': 'OUT049',
    'Outlet_Establishment_Year': 1999,
    'Outlet_Size': 'Medium',
    'Outlet_Location_Type': 'Tier 1',
    'Outlet_Type': 'Supermarket Type1'
}

# Print the predicted sales
predicted_sales = predictive_system(input_example)
print(predicted_sales)


2438.3586
