<a href="https://colab.research.google.com/github/SHEETALDHARASHAN/E-learning-platform/blob/main/sfc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load the dataset into pandas DataFrames
train_df = pd.read_csv('train.csv')
stores_df = pd.read_csv('stores.csv')
transactions_df = pd.read_csv('transactions.csv')

# Display the first few rows of each DataFrame
print(train_df.head())
print(stores_df.head())
print(transactions_df.head())
# Merge dataframes as necessary
merged_df = pd.merge(train_df, stores_df, on='store_nbr')
merged_df = pd.merge(merged_df, transactions_df, on=['date', 'store_nbr'])

# Feature Engineering: Add seasonal and promotional features
merged_df['date'] = pd.to_datetime(merged_df['date'])
merged_df['month'] = merged_df['date'].dt.month
merged_df['day'] = merged_df['date'].dt.day
merged_df['year'] = merged_df['date'].dt.year

# Display the merged dataframe
print(merged_df.head())
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Define features and target
features = merged_df[['store_nbr', 'family', 'month', 'day', 'year', 'onpromotion', 'transactions']]
target = merged_df['sales']

# Encode categorical features
features = pd.get_dummies(features, columns=['family'])

# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')


   id        date  store_nbr      family  sales  onpromotion
0   0  2013-01-01        1.0  AUTOMOTIVE    0.0          0.0
1   1  2013-01-01        1.0   BABY CARE    0.0          0.0
2   2  2013-01-01        1.0      BEAUTY    0.0          0.0
3   3  2013-01-01        1.0   BEVERAGES    0.0          0.0
4   4  2013-01-01        1.0       BOOKS    0.0          0.0
   store_nbr           city                           state type  cluster
0          1          Quito                       Pichincha    D       13
1          2          Quito                       Pichincha    D       13
2          3          Quito                       Pichincha    D        8
3          4          Quito                       Pichincha    D        9
4          5  Santo Domingo  Santo Domingo de los Tsachilas    D        4
         date  store_nbr  transactions
0  2013-01-01         25           770
1  2013-01-02          1          2111
2  2013-01-02          2          2358
3  2013-01-02          3          

In [None]:
def recommend_products(store_nbr, month, day, year, onpromotion, transactions):
    input_data = {
        'store_nbr': [store_nbr],
        'month': [month],
        'day': [day],
        'year': [year],
        'onpromotion': [onpromotion],
        'transactions': [transactions]
    }

    # Create a dataframe for the input data
    input_df = pd.DataFrame(input_data)

    # Get all unique product families from the original data
    product_families = merged_df['family'].unique()

    # Create rows for each product family
    all_families_df = pd.concat([input_df] * len(product_families), ignore_index=True)
    all_families_df['family'] = product_families

    # One-hot encode the family column
    all_families_df = pd.get_dummies(all_families_df, columns=['family'])

    # Ensure all columns present in the training set are in the input data
    for col in features.columns:
        if col not in all_families_df.columns:
            all_families_df[col] = 0

    # Predict sales for each product family
    predicted_sales = model.predict(all_families_df)

    # Create a dataframe to hold product family and their predicted sales
    results_df = pd.DataFrame({
        'family': product_families,
        'predicted_sales': predicted_sales
    })

    # Calculate the total predicted sales
    total_predicted_sales = results_df['predicted_sales'].sum()

    # Calculate the percentage of predicted sales for each product family
    results_df['percentage'] = (results_df['predicted_sales'] / total_predicted_sales) * 100

    # Sort the results by predicted sales in descending order
    results_df = results_df.sort_values(by='predicted_sales', ascending=False)

    # Return the dataframe with recommended products and their percentages
    return results_df

# Example usage:
store_nbr = 1
month = 12
day = 1
year = 2024
onpromotion = 0
transactions = 1000

recommended_products = recommend_products(store_nbr, month, day, year, onpromotion, transactions)
print(recommended_products.head())

          family  predicted_sales  percentage
12     GROCERY I       2329.65529   44.242286
7       CLEANING        669.61000   12.716507
3      BEVERAGES        604.82000   11.486085
8          DAIRY        326.93000    6.208700
5   BREAD/BAKERY        231.47767    4.395973
