In [1]:
from flask import Flask, jsonify, request
import io
import pickle 
import csv
import pandas as pd

In [2]:
model = pickle.load(open("pricing_model.pkl", "rb"))


In [3]:
app = Flask(__name__)


In [4]:

@app.route("/",methods=["GET"])
def index():
    return "index";

@app.route('/predictions/prices', methods=['POST'])
def predict():
    
    csv_string = request.get_data().decode("utf-8") # Get input data from the request
    input_data = pd.read_csv(io.StringIO(csv_string));
    y = input_data.price
    input_data.info()
    #print(input_data)
    # Perform preprocessing on the input data if required
    input_data.dropna(axis=0, subset=['price'], inplace=True)
    input_data.drop(['price'], axis=1, inplace=True)

    input_data['date'] = pd.to_datetime(input_data['date'])
    
    low_cardinality_cols = [cname for cname in input_data.columns if input_data[cname].nunique() < 10 and 
                        input_data[cname].dtype == "object"]
    
    numeric_cols = [cname for cname in input_data.columns if input_data[cname].dtype in ['int64', 'float64']]

    # Keep selected columns only
    my_cols = low_cardinality_cols + numeric_cols
    input_data1 = input_data[my_cols].copy()

    # One-hot encode the data (to shorten the code, we use pandas)

    input_data1 = pd.get_dummies(input_data1)

    # Make predictions using the loaded model
    predictions = model.predict(input_data1)
    predictions_df = pd.DataFrame({'item_id' : input_data['item_id'],'Actual': y, 'Predicted': predictions})
    aggregated_predictions = predictions_df.groupby(input_data1['item_id']).mean()
    aggregated_predictions['item_id'] = aggregated_predictions['item_id'].astype(int)
    j = aggregated_predictions.to_json(orient='records')

    print(j)
    #return jsonify([])
    # Return the predictions as a response
    return j


In [None]:
if __name__ == '__main__':
    app.run(host='0.0.0.0',port=5000)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on all addresses.
 * Running on http://10.128.235.104:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [26/Jun/2023 13:10:02] "POST /predictions/prices HTTP/1.1" 200 -


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21920 entries, 0 to 21919
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        21920 non-null  object 
 1   item_id     21920 non-null  int64  
 2   price       21920 non-null  float64
 3   item_count  21920 non-null  float64
dtypes: float64(2), int64(1), object(1)
memory usage: 685.1+ KB
[{"item_id":1,"Actual":6.71,"Predicted":6.7129998207},{"item_id":2,"Actual":15.09,"Predicted":15.0905265808},{"item_id":3,"Actual":29.22,"Predicted":29.211473465},{"item_id":4,"Actual":26.42,"Predicted":26.420633316},{"item_id":5,"Actual":6.07,"Predicted":6.0716757774},{"item_id":6,"Actual":5.61,"Predicted":5.616941452},{"item_id":7,"Actual":8.1,"Predicted":8.1026763916},{"item_id":8,"Actual":24.98,"Predicted":24.9700336456},{"item_id":9,"Actual":3.91,"Predicted":3.9155955315},{"item_id":10,"Actual":21.13,"Predicted":21.1237411499},{"item_id":11,"Actual":19.48,"Predicted":19.