In [1]:
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error
from category_encoders import OneHotEncoder
from sklearn.pipeline import  Pipeline, make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("product seasonal sales.csv").drop(columns=['Cities', 'Vitamin_1', 'Vitamin_2' ,'Mineral'])

df_sample, _ = train_test_split(df, test_size=0.5, random_state=42)

df_sample.to_csv("product_and_sales_dataset.csv", index=False)

print(f"Selected {len(df_sample)} rows out of {len(df)} total rows and saved to 'sampled_project_dataset.csv'")
df_sample

Selected 434000 rows out of 868000 total rows and saved to 'sampled_project_dataset.csv'


Unnamed: 0,Countries,Seasons,Healthy Foods,Unit of Measurement,Measurement Type,Price(USD)
801046,Switzerland,Spring,Carrots,500,Grams,2.13
554286,Belgium,Spring,Papaya,1,Piece,2.56
273396,Botswana,Dry Season,Bell Peppers,1,Piece,1.14
840758,United Kingdom,Fall,Garlic,50,Grams Per Bulk,1.77
464925,Tanzania,Dry Season,Broccoli,250,Grams,3.44
...,...,...,...,...,...,...
259178,Azerbaijan,Winter,Honey,250,Grams,6.04
365838,Lebanon,Winter,Seaweed,50,Grams,5.75
131932,Mauritania,Summer,Sweet Potatoes,1,Bulk,1.18
671155,Lithuania,Spring,Hemp Seeds,100,Grams,8.98


In [265]:
target = "Price(USD)"
y = df_sample[target]
X = df_sample.drop(columns=[target])

In [266]:
y_mean = y.mean()
y_pred_baseline  = [y_mean] * len(y)
mae_baseline = mean_absolute_error(y, y_pred_baseline)
print("mean P2 readings:", round(y_mean, 2))
print("mae baseline readings:", round(mae_baseline, 2))

mean P2 readings: 4.44
mae baseline readings: 2.46


In [267]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=40
)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(347200, 5)
(347200,)
(86800, 5)
(86800,)


In [268]:
ohe = OneHotEncoder().fit(X_train)

xt = ohe.transform(X_train)

imputer = SimpleImputer().fit(xt)

xt_train = imputer.transform(xt)

In [269]:
model = make_pipeline(
    OneHotEncoder(use_cat_names=True),
    SimpleImputer(),
    Ridge()
).fit(X_train,y_train)
model

Pipeline(steps=[('onehotencoder',
                 OneHotEncoder(cols=['Countries', 'Seasons', 'Healthy Foods',
                                     'Measurement Type'],
                               use_cat_names=True)),
                ('simpleimputer', SimpleImputer()), ('ridge', Ridge())])

In [None]:
mae_train= mean_absolute_error(y_train, model.predict(X_train))
mae_test = mean_absolute_error(y_test, model.predict(X_test))

print("MAE training:", mae_train.round(2))
print("MAE test:", mae_test.round(2))

MAE training: 0.85
MAE test: 0.85


In [None]:
intercept = model.named_steps["ridge"].intercept_
coefficient = model.named_steps["ridge"].coef_
print("intercept:", intercept.round(2))
print("coefficient:", coefficient[:5])

In [None]:
with open('production_and_sales_model.pkl', 'wb') as file:
    pickle.dump(model, file)
    
print("Model saved successfully as 'trained_regression_model.pkl'!")

In [None]:
with open('production_and_sales_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [None]:
loaded_predictions = loaded_model.predict(X_test)
print("Loaded model predictions:", loaded_predictions)

In [None]:
input_data = {
    "Countries": ["Nigeria"],
    "Seasons": ["Dry Season"],
    "Healthy Foods": ["Garlic"],
    "Unit of Measurement": [50],  # Quantity is 1 Kilogram
    "Measurement Type": ["Grams per Bulk"]
}

# Convert input_data to a DataFrame
input_df = pd.DataFrame(input_data)

# Use your trained model to predict the price (assuming `model` is your trained pipeline)
predicted_price = loaded_model.predict(input_df)

print(f"Predicted Price : ${round(predicted_price[0], 2)}")
#print(predicted_price[0] * input_df[Unit of Measurement])

In [6]:
  import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
data = {
  "input_data": {
    "columns": [
      "Countries",
      "Seasons",
      "Healthy Foods",
      "Unit of Measurement",
      "Measurement Type"
    ],
    "index": [0, 1, 2, 3, 4, 5, 6, 7, 8],
    "data": [
      ["Switzerland", "Spring", "Carrots", 500, "Grams"],
      ["Belgium", "Spring", "Papaya", 1, "Piece"],
      ["Botswana", "Dry Season", "Bell Peppers", 1, "Piece"],
      ["United Kingdom", "Fall", "Garlic", 50, "Grams Per Bulk"],
      ["Tanzania", "Dry Season", "Broccoli", 250, "Grams"],
      ["Moldova", "Winter", "Goji Berries", 100, "Grams"],
      ["Russia", "Spring", "Dark Chocolate", 50, "Grams"],
      ["United States", "Summer", "Matcha", 30, "Grams"],
      ["Germany", "Fall", "Raspberries", 125, "Grams"]
    ]
  }
}


body = str.encode(json.dumps(data))

url = 'https://optimizing-production-and-jfimz.eastus2.inference.ml.azure.com/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = 'eOgSsBbK4luONHPTVCOgrAZs6vLrsyXE'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the request ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))


b'[1.7023543850409846, 1.9004708870182823, 2.050600341285131, 1.2038314784906712, 2.803756317851423, 12.534168463661793, 4.016228192131021, 14.548059384097506, 3.9836170625551715]'


In [None]:
measurement_type = st.text_input('Measurement Type (e.g., grams, piece)', 'Grams')