In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import ipywidgets as widgets
import plotly.graph_objects as go
from datetime import datetime, timedelta
from IPython.display import *
from ipywidgets import interact, Dropdown, FloatSlider

In [2]:
# Rendering the dataset in a dataframe
df = pd.read_csv('../tests/Final_Output/Processed_Grain_2023-05-07.csv')
df.head()

Unnamed: 0,contract_type,volatility,bid,offer,market_to_market,first,last,high,low,deals,contracts,value,upload_date,market_name,market_code,market_type
0,01 Mar 23 BEAN,9668.0,9664.0,9677.0,9668.0,9640.0,9686.4,9688.0,9640.0,5.0,10,9670720,2023-02-06,Afgri,silo_5,Silos
1,02 May 23 BEAN,9668.0,9669.0,9690.0,9690.0,9670.0,9689.6,9692.0,9670.0,3.0,20,19369450,2023-02-06,Afgri,silo_5,Silos
2,03 Jul 23 BEAN,9668.0,9650.0,9683.0,9650.0,9619.0,9619.0,0.0,0.0,0.0,0,0,2023-02-06,Afgri,silo_5,Silos
3,01 Sep 23 BEAN,9668.0,8959.0,9002.0,8963.0,8963.0,8963.0,8963.0,8963.0,1.0,10,8963000,2023-02-06,Afgri,silo_5,Silos
4,01 Nov 23 BEAN,9668.0,8762.0,8822.0,8775.0,8775.0,8775.0,0.0,0.0,0.0,0,0,2023-02-06,Afgri,silo_5,Silos


In [3]:
# Taking a peek into the metadata of the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741660 entries, 0 to 741659
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   contract_type     741660 non-null  object 
 1   volatility        741660 non-null  float64
 2   bid               741660 non-null  float64
 3   offer             741660 non-null  float64
 4   market_to_market  741660 non-null  float64
 5   first             741660 non-null  float64
 6   last              741660 non-null  float64
 7   high              741660 non-null  float64
 8   low               741660 non-null  float64
 9   deals             741660 non-null  float64
 10  contracts         741660 non-null  int64  
 11  value             741660 non-null  int64  
 12  upload_date       741660 non-null  object 
 13  market_name       741660 non-null  object 
 14  market_code       741660 non-null  object 
 15  market_type       741660 non-null  object 
dtypes: float64(9), int64

In [4]:
df.isnull().sum()

contract_type       0
volatility          0
bid                 0
offer               0
market_to_market    0
first               0
last                0
high                0
low                 0
deals               0
contracts           0
value               0
upload_date         0
market_name         0
market_code         0
market_type         0
dtype: int64

### This block of code was intended to eliminate columns which contain 'Nans'. Not necessary for the streamlit

for i, row in df.iterrows():
    if df.loc[i, :].isnull().sum() > 11:
        df.drop([i], inplace=True)
        
df

### This block was intended to slice out applicable columns rendered in object data type for the purpose of converting to numeric - Not necessary for streamlit

object_cols = df.loc[ : , df.dtypes == 'O']

Extracting column names with list comprehension
object_type_columns = [col for col in object_cols.columns]

object_type_columns_1 = object_type_columns[1:-1]
object_type_columns_1


### This block was intended to convert column object data types - Not necessary for streamlit

list_columns = list(df.columns)
numeric_columns = list_columns[1:-4]

for col in numeric_columns:
    if df[col].dtype == 'O':
        df[col] = df[col].str.replace(',','').astype(float)

df.info()

In [5]:
'''
Split the information under the 'contract_type' column by the spaces for the purpose of extrating the contract 
date information and grain type information
'''

df[['Day', 'Month', 'Year', "Grain_Type"]] = df['contract_type'].str.split(' ', expand=True)
df

Unnamed: 0,contract_type,volatility,bid,offer,market_to_market,first,last,high,low,deals,contracts,value,upload_date,market_name,market_code,market_type,Day,Month,Year,Grain_Type
0,01 Mar 23 BEAN,9668.00,9664.0,9677.0,9668.00,9640.0,9686.4,9688.0,9640.0,5.0,10,9670720,2023-02-06,Afgri,silo_5,Silos,01,Mar,23,BEAN
1,02 May 23 BEAN,9668.00,9669.0,9690.0,9690.00,9670.0,9689.6,9692.0,9670.0,3.0,20,19369450,2023-02-06,Afgri,silo_5,Silos,02,May,23,BEAN
2,03 Jul 23 BEAN,9668.00,9650.0,9683.0,9650.00,9619.0,9619.0,0.0,0.0,0.0,0,0,2023-02-06,Afgri,silo_5,Silos,03,Jul,23,BEAN
3,01 Sep 23 BEAN,9668.00,8959.0,9002.0,8963.00,8963.0,8963.0,8963.0,8963.0,1.0,10,8963000,2023-02-06,Afgri,silo_5,Silos,01,Sep,23,BEAN
4,01 Nov 23 BEAN,9668.00,8762.0,8822.0,8775.00,8775.0,8775.0,0.0,0.0,0.0,0,0,2023-02-06,Afgri,silo_5,Silos,01,Nov,23,BEAN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741655,23 Apr 24 YMAZ,3560.00,0.0,0.0,3849.00,3849.0,3849.0,0.0,0.0,0.0,0,0,2023-05-05,White River Mills,MILL24,Millers,23,Apr,24,YMAZ
741656,24 May 24 YMAZ,3560.00,3691.0,0.0,4136.00,4136.0,4136.0,0.0,0.0,0.0,0,0,2023-05-05,White River Mills,MILL24,Millers,24,May,24,YMAZ
741657,24 Jul 24 YMAZ,3560.00,3781.0,3800.0,3784.00,3784.0,3800.0,3800.0,3784.0,2.0,2,758400,2023-05-05,White River Mills,MILL24,Millers,24,Jul,24,YMAZ
741658,20 Dec 24 YMAZ,3560.00,0.0,0.0,3941.00,3941.0,3941.0,0.0,0.0,0.0,0,0,2023-05-05,White River Mills,MILL24,Millers,20,Dec,24,YMAZ


In [6]:
# The ensuing 'Month' column from the previous code is transformed from month code numbers into month names
df['Month_Code'] = df['Month'].replace(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12' ])
df

Unnamed: 0,contract_type,volatility,bid,offer,market_to_market,first,last,high,low,deals,...,value,upload_date,market_name,market_code,market_type,Day,Month,Year,Grain_Type,Month_Code
0,01 Mar 23 BEAN,9668.00,9664.0,9677.0,9668.00,9640.0,9686.4,9688.0,9640.0,5.0,...,9670720,2023-02-06,Afgri,silo_5,Silos,01,Mar,23,BEAN,3
1,02 May 23 BEAN,9668.00,9669.0,9690.0,9690.00,9670.0,9689.6,9692.0,9670.0,3.0,...,19369450,2023-02-06,Afgri,silo_5,Silos,02,May,23,BEAN,5
2,03 Jul 23 BEAN,9668.00,9650.0,9683.0,9650.00,9619.0,9619.0,0.0,0.0,0.0,...,0,2023-02-06,Afgri,silo_5,Silos,03,Jul,23,BEAN,7
3,01 Sep 23 BEAN,9668.00,8959.0,9002.0,8963.00,8963.0,8963.0,8963.0,8963.0,1.0,...,8963000,2023-02-06,Afgri,silo_5,Silos,01,Sep,23,BEAN,9
4,01 Nov 23 BEAN,9668.00,8762.0,8822.0,8775.00,8775.0,8775.0,0.0,0.0,0.0,...,0,2023-02-06,Afgri,silo_5,Silos,01,Nov,23,BEAN,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741655,23 Apr 24 YMAZ,3560.00,0.0,0.0,3849.00,3849.0,3849.0,0.0,0.0,0.0,...,0,2023-05-05,White River Mills,MILL24,Millers,23,Apr,24,YMAZ,4
741656,24 May 24 YMAZ,3560.00,3691.0,0.0,4136.00,4136.0,4136.0,0.0,0.0,0.0,...,0,2023-05-05,White River Mills,MILL24,Millers,24,May,24,YMAZ,5
741657,24 Jul 24 YMAZ,3560.00,3781.0,3800.0,3784.00,3784.0,3800.0,3800.0,3784.0,2.0,...,758400,2023-05-05,White River Mills,MILL24,Millers,24,Jul,24,YMAZ,7
741658,20 Dec 24 YMAZ,3560.00,0.0,0.0,3941.00,3941.0,3941.0,0.0,0.0,0.0,...,0,2023-05-05,White River Mills,MILL24,Millers,20,Dec,24,YMAZ,12


In [7]:
# The short format of year is enhanced into a fuller format
df['Year_Full_Format'] = '20' + df['Year']
df

Unnamed: 0,contract_type,volatility,bid,offer,market_to_market,first,last,high,low,deals,...,upload_date,market_name,market_code,market_type,Day,Month,Year,Grain_Type,Month_Code,Year_Full_Format
0,01 Mar 23 BEAN,9668.00,9664.0,9677.0,9668.00,9640.0,9686.4,9688.0,9640.0,5.0,...,2023-02-06,Afgri,silo_5,Silos,01,Mar,23,BEAN,3,2023
1,02 May 23 BEAN,9668.00,9669.0,9690.0,9690.00,9670.0,9689.6,9692.0,9670.0,3.0,...,2023-02-06,Afgri,silo_5,Silos,02,May,23,BEAN,5,2023
2,03 Jul 23 BEAN,9668.00,9650.0,9683.0,9650.00,9619.0,9619.0,0.0,0.0,0.0,...,2023-02-06,Afgri,silo_5,Silos,03,Jul,23,BEAN,7,2023
3,01 Sep 23 BEAN,9668.00,8959.0,9002.0,8963.00,8963.0,8963.0,8963.0,8963.0,1.0,...,2023-02-06,Afgri,silo_5,Silos,01,Sep,23,BEAN,9,2023
4,01 Nov 23 BEAN,9668.00,8762.0,8822.0,8775.00,8775.0,8775.0,0.0,0.0,0.0,...,2023-02-06,Afgri,silo_5,Silos,01,Nov,23,BEAN,11,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741655,23 Apr 24 YMAZ,3560.00,0.0,0.0,3849.00,3849.0,3849.0,0.0,0.0,0.0,...,2023-05-05,White River Mills,MILL24,Millers,23,Apr,24,YMAZ,4,2024
741656,24 May 24 YMAZ,3560.00,3691.0,0.0,4136.00,4136.0,4136.0,0.0,0.0,0.0,...,2023-05-05,White River Mills,MILL24,Millers,24,May,24,YMAZ,5,2024
741657,24 Jul 24 YMAZ,3560.00,3781.0,3800.0,3784.00,3784.0,3800.0,3800.0,3784.0,2.0,...,2023-05-05,White River Mills,MILL24,Millers,24,Jul,24,YMAZ,7,2024
741658,20 Dec 24 YMAZ,3560.00,0.0,0.0,3941.00,3941.0,3941.0,0.0,0.0,0.0,...,2023-05-05,White River Mills,MILL24,Millers,20,Dec,24,YMAZ,12,2024


In [8]:
# The Contract Month, Day and Year are fused together with '/' to give the contract date
df['Contract_Date'] = df['Month_Code']+'/'+df['Day']+'/'+ df['Year_Full_Format']
df

Unnamed: 0,contract_type,volatility,bid,offer,market_to_market,first,last,high,low,deals,...,market_name,market_code,market_type,Day,Month,Year,Grain_Type,Month_Code,Year_Full_Format,Contract_Date
0,01 Mar 23 BEAN,9668.00,9664.0,9677.0,9668.00,9640.0,9686.4,9688.0,9640.0,5.0,...,Afgri,silo_5,Silos,01,Mar,23,BEAN,3,2023,3/01/2023
1,02 May 23 BEAN,9668.00,9669.0,9690.0,9690.00,9670.0,9689.6,9692.0,9670.0,3.0,...,Afgri,silo_5,Silos,02,May,23,BEAN,5,2023,5/02/2023
2,03 Jul 23 BEAN,9668.00,9650.0,9683.0,9650.00,9619.0,9619.0,0.0,0.0,0.0,...,Afgri,silo_5,Silos,03,Jul,23,BEAN,7,2023,7/03/2023
3,01 Sep 23 BEAN,9668.00,8959.0,9002.0,8963.00,8963.0,8963.0,8963.0,8963.0,1.0,...,Afgri,silo_5,Silos,01,Sep,23,BEAN,9,2023,9/01/2023
4,01 Nov 23 BEAN,9668.00,8762.0,8822.0,8775.00,8775.0,8775.0,0.0,0.0,0.0,...,Afgri,silo_5,Silos,01,Nov,23,BEAN,11,2023,11/01/2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741655,23 Apr 24 YMAZ,3560.00,0.0,0.0,3849.00,3849.0,3849.0,0.0,0.0,0.0,...,White River Mills,MILL24,Millers,23,Apr,24,YMAZ,4,2024,4/23/2024
741656,24 May 24 YMAZ,3560.00,3691.0,0.0,4136.00,4136.0,4136.0,0.0,0.0,0.0,...,White River Mills,MILL24,Millers,24,May,24,YMAZ,5,2024,5/24/2024
741657,24 Jul 24 YMAZ,3560.00,3781.0,3800.0,3784.00,3784.0,3800.0,3800.0,3784.0,2.0,...,White River Mills,MILL24,Millers,24,Jul,24,YMAZ,7,2024,7/24/2024
741658,20 Dec 24 YMAZ,3560.00,0.0,0.0,3941.00,3941.0,3941.0,0.0,0.0,0.0,...,White River Mills,MILL24,Millers,20,Dec,24,YMAZ,12,2024,12/20/2024


In [9]:
# The Contract Date and upload date is now rendered a proper datetime format for coding purpose
df['Contract_Date'] = pd.to_datetime(df.Contract_Date)
df['upload_date'] = pd.to_datetime(df.upload_date)
df

Unnamed: 0,contract_type,volatility,bid,offer,market_to_market,first,last,high,low,deals,...,market_name,market_code,market_type,Day,Month,Year,Grain_Type,Month_Code,Year_Full_Format,Contract_Date
0,01 Mar 23 BEAN,9668.00,9664.0,9677.0,9668.00,9640.0,9686.4,9688.0,9640.0,5.0,...,Afgri,silo_5,Silos,01,Mar,23,BEAN,3,2023,2023-03-01
1,02 May 23 BEAN,9668.00,9669.0,9690.0,9690.00,9670.0,9689.6,9692.0,9670.0,3.0,...,Afgri,silo_5,Silos,02,May,23,BEAN,5,2023,2023-05-02
2,03 Jul 23 BEAN,9668.00,9650.0,9683.0,9650.00,9619.0,9619.0,0.0,0.0,0.0,...,Afgri,silo_5,Silos,03,Jul,23,BEAN,7,2023,2023-07-03
3,01 Sep 23 BEAN,9668.00,8959.0,9002.0,8963.00,8963.0,8963.0,8963.0,8963.0,1.0,...,Afgri,silo_5,Silos,01,Sep,23,BEAN,9,2023,2023-09-01
4,01 Nov 23 BEAN,9668.00,8762.0,8822.0,8775.00,8775.0,8775.0,0.0,0.0,0.0,...,Afgri,silo_5,Silos,01,Nov,23,BEAN,11,2023,2023-11-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
741655,23 Apr 24 YMAZ,3560.00,0.0,0.0,3849.00,3849.0,3849.0,0.0,0.0,0.0,...,White River Mills,MILL24,Millers,23,Apr,24,YMAZ,4,2024,2024-04-23
741656,24 May 24 YMAZ,3560.00,3691.0,0.0,4136.00,4136.0,4136.0,0.0,0.0,0.0,...,White River Mills,MILL24,Millers,24,May,24,YMAZ,5,2024,2024-05-24
741657,24 Jul 24 YMAZ,3560.00,3781.0,3800.0,3784.00,3784.0,3800.0,3800.0,3784.0,2.0,...,White River Mills,MILL24,Millers,24,Jul,24,YMAZ,7,2024,2024-07-24
741658,20 Dec 24 YMAZ,3560.00,0.0,0.0,3941.00,3941.0,3941.0,0.0,0.0,0.0,...,White River Mills,MILL24,Millers,20,Dec,24,YMAZ,12,2024,2024-12-20


In [94]:
# Combining the 'market_name' and 'market_code' columns to generate 'Market' Column
df['Market'] = df['market_name']+" "+df['market_code']

C = len(df['Market'].unique())
D = len(df['Grain_Type'].unique())
print (f"Total No. of Grain Types in Grain Dataset = {D}")
print (f"Total No. of Markets in Grain Dataset = {C}")

Total No. of Grain Types in Grain Dataset = 40
Total No. of Markets in Grain Dataset = 282


In [10]:
# Counting the number of records per grain type
Transaction_Volume = df.groupby(['Grain_Type']).size().rename('Count').reset_index()
Transaction_Volume

Unnamed: 0,Grain_Type,Count
0,BEAN,40890
1,BRNT,11844
2,COPP,11844
3,CORN,35814
4,DSEL,41736
5,GOLD,11844
6,KANS,21714
7,MEAL,20022
8,OILS,14100
9,PALL,11844


In [11]:
# Plotting the count of records of transactions per grain type
px.bar(Transaction_Volume, x = 'Grain_Type', y = 'Count', title = 'Count of Contract Transactions Per Grain')

In [91]:
# Generating an interactive pie chart to peek through the distribution of market activities for each grain
list_Grain = list(df['Grain_Type'].unique())
list_Contract = list(df['contract_type'].unique())

# Required function for the interactive charts
def grain_pie(Grain):
    A= df.loc[df['Grain_Type'] == Grain, ('contract_type')]
    B = len(A)
    C = len(df['Market'].unique())
 
    fig = px.pie(A, values=A.value_counts().values, names=A.value_counts().index) 
    fig.update_traces(hoverinfo='label+percent+name', textinfo='value')
    
    fig.update_layout(
    title_text= f"Trading Activities for {Grain} Across All {C} Markets; Total No. of {Grain} Tranasactions = {B}"
    )
    fig.show()
    
widgets.interact(grain_pie, Grain = list_Grain, Contract = list_Contract )

interactive(children=(Dropdown(description='Grain', options=('BEAN', 'BRNT', 'COPP', 'CORN', 'DSEL', 'GOLD', '…

<function __main__.grain_pie(Grain)>

In [13]:
# Let us see the final metadata after
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741660 entries, 0 to 741659
Data columns (total 24 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   contract_type     741660 non-null  object        
 1   volatility        741660 non-null  float64       
 2   bid               741660 non-null  float64       
 3   offer             741660 non-null  float64       
 4   market_to_market  741660 non-null  float64       
 5   first             741660 non-null  float64       
 6   last              741660 non-null  float64       
 7   high              741660 non-null  float64       
 8   low               741660 non-null  float64       
 9   deals             741660 non-null  float64       
 10  contracts         741660 non-null  int64         
 11  value             741660 non-null  int64         
 12  upload_date       741660 non-null  datetime64[ns]
 13  market_name       741660 non-null  object        
 14  mark

In [14]:

# Listing of filter information
list1 = list(df.Market.unique())
list2 = list(df.Grain_Type.unique())
list3 = list(df.upload_date.unique())

# Function for interactive chart
def filteringplotly(Market, Grain_Type, Upload_Date):
    A = df.loc[df['Market'] == Market, ('last', 'bid', 'Grain_Type','upload_date', 'Contract_Date')]
    B = A.loc[A['Grain_Type'] == Grain_Type, ('last', 'bid', 'upload_date', 'Contract_Date')]
    C = B.loc[B['upload_date']== Upload_Date , ('last', 'bid', 'Contract_Date')]
    fig = px.bar(C, x='Contract_Date', y="last", text_auto = True, title="Price (Last) of Grains Versus Contract Dates")
    fig.show()

#widget function for chart rendering
widgets.interact(filteringplotly, Market = list1, Grain_Type = list2, Upload_Date = list3)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Grain_Type, Upload_Date)>

In [95]:

date_range = pd.to_datetime(datetime.now() - timedelta(days=365))

df_New = df.loc[df['upload_date'] > date_range, ('Market','last', 'bid', 'Grain_Type','upload_date', 'Contract_Date', 'contract_type', 'deals')]
list1 = list(df_New.Market.unique())
list2 = list(df_New.Grain_Type.unique())
list3 = sorted(list(df_New.upload_date.unique()), reverse = True)


def filteringplotly(Market, Grain_Type, Upload_Date):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid', 'Grain_Type','upload_date', 'Contract_Date')]
    B = A.loc[A['Grain_Type'] == Grain_Type, ('last', 'bid', 'upload_date', 'Contract_Date')]
    date_range_from = pd.to_datetime(datetime.now() - timedelta(days=365))
    D = B.loc[B['Contract_Date'] > date_range_from, ('last', 'bid', 'upload_date', 'Contract_Date')]
    C = D.loc[D['upload_date']== Upload_Date , ('last', 'bid', 'Contract_Date')]

    fig = go.Figure(data =[
                    go.Bar(name = 'Last', x=C['Contract_Date'], y =C['last']),
                    go.Bar(name = 'Bid', x=C['Contract_Date'], y = C['bid'])
                    ])
    fig.update_layout(barmode = 'group', title=f"{Grain_Type} Price (Last/Bid Price) Versus Contract Dates")
    # Add range slider
    
    fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

    fig.show()


widgets.interact(filteringplotly, Market = list1, Grain_Type = list2, Upload_Date = list3)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Grain_Type, Upload_Date)>

In [16]:
def filteringplotly(Market, Grain_Type):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid', 'Grain_Type','upload_date', 'Contract_Date')]
    B = A.loc[A['Grain_Type'] == Grain_Type, ('last', 'bid', 'upload_date', 'Contract_Date')]
    date_range_from = pd.to_datetime(datetime.now())
    D = B.loc[B['Contract_Date'] > date_range_from, ('last', 'bid', 'upload_date', 'Contract_Date')]
    C = D.groupby('Contract_Date')['last', 'bid'].mean().reset_index()

    fig = go.Figure(data =[
                    go.Bar(name = 'Last Price (Avg)', x=C['Contract_Date'], y =C['last']),
                    go.Bar(name = 'Bid Price (Avg)', x=C['Contract_Date'], y = C['bid'])
                    ])
    fig.update_layout(barmode = 'group', title=f"Average Price (Last/Bid Price) over the Past Year for Unsettled {Grain_Type} Contracts")
    # Add range slider
    
    fig.update_layout(xaxis=dict(rangeslider=dict(visible=True), type="date"))

    fig.show()


widgets.interact(filteringplotly, Market = list1, Grain_Type = list2, Upload_Date = list3)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Grain_Type)>

In [17]:

def filteringplotly(Market, Grain_Type):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid','upload_date','contract_type', 'Grain_Type')]
    B = A.loc[A['Grain_Type'] == Grain_Type, ('last', 'bid', 'upload_date','contract_type')]
    Max_Price = B['last'].loc[B['last'].idxmax()]
    Min_Price = B['last'].loc[B['last'].idxmin()]

    Max_Date = '{:%B %d, %Y}'.format(B['upload_date'].loc[B['upload_date'].idxmax()])
    Min_Date = '{:%B %d, %Y}'.format(B['upload_date'].loc[B['upload_date'].idxmin()])
    
    fig = px.line(B, x='upload_date', y='last', color='contract_type')
    
    fig.update_layout(barmode = 'group', 
    title= f"Last Price of {Grain_Type} Contracts (Period = {Min_Date} To {Max_Date}; Price Range = {Min_Price} - {Max_Price})")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(filteringplotly, Market = list1, Grain_Type = list2)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Grain_Type)>

In [18]:

def filteringplotly(Market, Grain_Type):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid','upload_date','contract_type', 'Grain_Type')]
    B = A.loc[A['Grain_Type'] == Grain_Type, ('last', 'bid', 'upload_date','contract_type')]
    Max_Price = B['last'].loc[B['last'].idxmax()]
    Min_Price = B['last'].loc[B['last'].idxmin()]

    Max_Date = '{:%B %d, %Y}'.format(B['upload_date'].loc[B['upload_date'].idxmax()])
    Min_Date = '{:%B %d, %Y}'.format(B['upload_date'].loc[B['upload_date'].idxmin()])
    
    fig = px.line(B, x='upload_date', y='bid', color='contract_type')
    
    fig.update_layout(barmode = 'group', 
    title= f"Bid Price of {Grain_Type} Contracts (Period = {Min_Date} To {Max_Date}; Price Range = {Min_Price} - {Max_Price})")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(filteringplotly, Market = list1, Grain_Type = list2)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Grain_Type)>

In [19]:

def filteringplotly(Market, Grain_Type):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid','upload_date','contract_type', 'Grain_Type')]
    B = A.loc[A['Grain_Type'] == Grain_Type, ('last', 'bid', 'upload_date','contract_type')]
    C = B.groupby('upload_date')['last', 'bid'].mean().reset_index()

    Max_Date = '{:%B %d, %Y}'.format(B['upload_date'].loc[B['upload_date'].idxmax()])
    Min_Date = '{:%B %d, %Y}'.format(B['upload_date'].loc[B['upload_date'].idxmin()])
    
    fig = go.Figure(data =[
                    go.Scatter(name = 'Last Price', x=C['upload_date'], y =C['last']),
                    go.Scatter(name = 'Bid Price', x=C['upload_date'], y = C['bid'])
                    ])
    
    fig.update_layout(barmode = 'group', 
    title= f"Average Price (Last/Bid) of {Grain_Type} Contracts Versus Upload Date (Period = {Min_Date} To {Max_Date})")
    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(filteringplotly, Market = list1, Grain_Type = list2)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Grain_Type)>

In [20]:
list4 = list(df_New.contract_type.unique())


def filteringplotly(Market, Contract_Type):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid','upload_date','contract_type')]
    B = A.loc[A['contract_type'] == Contract_Type, ('last', 'bid', 'upload_date')]

    fig = go.Figure(data =[
                    go.Scatter(name = 'Last Price', x=B['upload_date'], y =B['last']),
                    go.Scatter(name = 'Bid Price', x=B['upload_date'], y = B['bid'])
                    ])
    
    fig.update_layout(barmode = 'group', title= f"Last/Bid Price of  '{Contract_Type} / {Market}'  Contract vs Upload Date")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(filteringplotly, Market = list1, Contract_Type = list4)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Contract_Type)>

In [21]:
# Combining last/bid prices with the deal volume
def filteringplotly(Market, Contract_Type):
    A = df_New.loc[df_New['Market'] == Market, ('last', 'bid','upload_date','contract_type', 'deals')]
    B = A.loc[A['contract_type'] == Contract_Type, ('last', 'bid', 'upload_date', 'deals')]

    fig = go.Figure(data =[
                    go.Bar(name = 'Last Price', x=B['upload_date'], y =B['last'], yaxis="y"),
                    go.Bar(name = 'Bid Price', x=B['upload_date'], y = B['bid'], yaxis="y"),
                    go.Scatter(name = 'Deals count', x=B['upload_date'], y = B['deals'], yaxis="y2")
                    ])
    fig.update_layout( barmode = 'group', 
        title= f"Last/Bid Price overlayed with Deal Volume for '{Contract_Type} / {Market}' Contract vs Upload Date",
    # create 1st y axis
    yaxis=dict(
    title="Last/Bid Price",
    titlefont=dict(color="#1f77b4"),
    tickfont=dict(color="#1f77b4")),
                      
    # create 2nd y axis
    yaxis2=dict(
        title="Deals Count",
        anchor="x", overlaying="y", side="right"))
    
    fig.show()

widgets.interact(filteringplotly, Market = list1, Contract_Type = list4)

interactive(children=(Dropdown(description='Market', options=('Afgri  silo_5 ', 'Afgri  silo_6 ', 'Afgri  silo…

<function __main__.filteringplotly(Market, Contract_Type)>

In [22]:



def MarketSegment(Upload_Date, Contract_Type ):
    Grouped = df.groupby(['upload_date','contract_type','Market'])['deals'].sum().rename('Deals').reset_index()

    Segment1 = Grouped.loc[Grouped['upload_date'] == Upload_Date, ('Market', 'Deals','contract_type')]
    Segment = Segment1.loc[Segment1['contract_type'] == Contract_Type , ('Market', 'Deals')]
    Total = Segment['Deals'].sum()
    fig = px.pie(values=Segment['Deals'], names= Segment['Market'], 
                 title= f'Market Segementation by Number of Deals; (Total Number of Deals for "{Contract_Type}" = {Total})')
    fig.show()

widgets.interact(MarketSegment,Upload_Date = list3, Contract_Type = list4 )

interactive(children=(Dropdown(description='Upload_Date', options=(numpy.datetime64('2023-05-05T00:00:00.00000…

<function __main__.MarketSegment(Upload_Date, Contract_Type)>

### Suggested further improvement in the above charts:
    Once a contract date is equal to the current date, it should vanish from the chart.


for i, row in df1.iterrows():
    if df1.loc[i, :].isnull().sum() > 5:
        df1.drop([i], inplace=True)
    if df1.loc[[i], ['Units']] == np.nan:
        df1.sales[d.sales==24] = 100
        
df1

df = pd.DataFrame(
    np.array([[1, 2, 3], [4, 5, np.nan], [7, 8, 9], [3, 2, np.nan], [5, 6, np.nan]]), 
    columns=['a', 'b', 'c']
)
df


df['c'] = df.apply(
    lambda row: row['a']*row['b'] if np.isnan(row['c']) else row['c'],
    axis=1
)
df


In [23]:
# Rendering the Sheep subset of the livestock dataset in a dataframe
df2_1_raw = pd.read_csv('../tests/Final_Output/Livestock_Sheep_2023-05-09.csv')
list2_1 = [0]
df2_1 = df2_1_raw[df2_1_raw.avg_purch.isin(list2_1) == False].reset_index(drop = True)
df2_1


Unnamed: 0,class,units,avg_mass,avg_purch,avg_selling,selling_min,selling_max,date_from,date_to
0,A0,23,13.07,75.98,83.90,81.21,87.00,2022-05-30,2022-06-05
1,A1,338,15.60,110.22,107.52,105.74,107.90,2022-05-30,2022-06-05
2,A2,5782,19.97,103.88,106.26,103.99,106.83,2022-05-30,2022-06-05
3,A3,846,21.98,103.21,102.47,102.19,103.25,2022-05-30,2022-06-05
4,A4,247,21.49,94.50,92.43,91.81,92.58,2022-05-30,2022-06-05
...,...,...,...,...,...,...,...,...,...
863,AB3,2,21.40,72.19,74.49,74.00,75.00,2022-02-07,2022-02-13
864,B2,26,24.28,71.47,75.26,73.66,79.04,2022-02-07,2022-02-13
865,B3,1,23.50,76.00,78.00,78.00,78.00,2022-02-07,2022-02-13
866,C2,338,22.89,71.70,75.00,71.99,77.97,2022-02-07,2022-02-13


In [24]:
df2_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 868 entries, 0 to 867
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   class        868 non-null    object 
 1   units        868 non-null    int64  
 2   avg_mass     868 non-null    float64
 3   avg_purch    868 non-null    float64
 4   avg_selling  868 non-null    float64
 5   selling_min  868 non-null    float64
 6   selling_max  868 non-null    float64
 7   date_from    868 non-null    object 
 8   date_to      868 non-null    object 
dtypes: float64(5), int64(1), object(3)
memory usage: 61.2+ KB


In [25]:
df2_1.isnull().sum()

class          0
units          0
avg_mass       0
avg_purch      0
avg_selling    0
selling_min    0
selling_max    0
date_from      0
date_to        0
dtype: int64

In [26]:
df2_1['date_from'] = pd.to_datetime(df2_1.date_from)
df2_1['date_to'] = pd.to_datetime(df2_1.date_to)
df2_1.rename(columns = {'class':'Sheep_Class'}, inplace = True)
df2_1

Unnamed: 0,Sheep_Class,units,avg_mass,avg_purch,avg_selling,selling_min,selling_max,date_from,date_to
0,A0,23,13.07,75.98,83.90,81.21,87.00,2022-05-30,2022-06-05
1,A1,338,15.60,110.22,107.52,105.74,107.90,2022-05-30,2022-06-05
2,A2,5782,19.97,103.88,106.26,103.99,106.83,2022-05-30,2022-06-05
3,A3,846,21.98,103.21,102.47,102.19,103.25,2022-05-30,2022-06-05
4,A4,247,21.49,94.50,92.43,91.81,92.58,2022-05-30,2022-06-05
...,...,...,...,...,...,...,...,...,...
863,AB3,2,21.40,72.19,74.49,74.00,75.00,2022-02-07,2022-02-13
864,B2,26,24.28,71.47,75.26,73.66,79.04,2022-02-07,2022-02-13
865,B3,1,23.50,76.00,78.00,78.00,78.00,2022-02-07,2022-02-13
866,C2,338,22.89,71.70,75.00,71.99,77.97,2022-02-07,2022-02-13


In [27]:

for i, row in df2_1.iterrows():
    if df2_1.loc[i, :].isnull().sum() == 6:
        df2_1.drop([i], inplace=True)
        
df2_1.isnull().sum()

Sheep_Class    0
units          0
avg_mass       0
avg_purch      0
avg_selling    0
selling_min    0
selling_max    0
date_from      0
date_to        0
dtype: int64

In [28]:
df2_1

Unnamed: 0,Sheep_Class,units,avg_mass,avg_purch,avg_selling,selling_min,selling_max,date_from,date_to
0,A0,23,13.07,75.98,83.90,81.21,87.00,2022-05-30,2022-06-05
1,A1,338,15.60,110.22,107.52,105.74,107.90,2022-05-30,2022-06-05
2,A2,5782,19.97,103.88,106.26,103.99,106.83,2022-05-30,2022-06-05
3,A3,846,21.98,103.21,102.47,102.19,103.25,2022-05-30,2022-06-05
4,A4,247,21.49,94.50,92.43,91.81,92.58,2022-05-30,2022-06-05
...,...,...,...,...,...,...,...,...,...
863,AB3,2,21.40,72.19,74.49,74.00,75.00,2022-02-07,2022-02-13
864,B2,26,24.28,71.47,75.26,73.66,79.04,2022-02-07,2022-02-13
865,B3,1,23.50,76.00,78.00,78.00,78.00,2022-02-07,2022-02-13
866,C2,338,22.89,71.70,75.00,71.99,77.97,2022-02-07,2022-02-13


In [29]:
# Visuals for the Sheep Subset of the Livestock Dataset

df2_1 = df2_1.sort_values(by=['date_to']).reset_index(drop = True)

def sheepplotly1(): 
    
    
    fig = px.line(df2_1, x='date_to', y='avg_purch', color='Sheep_Class')
    
    fig.update_layout(barmode = 'group', 
    title= "Plot Avg Purchase Price versus Dates")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(sheepplotly1)

interactive(children=(Output(),), _dom_classes=('widget-interact',))

<function __main__.sheepplotly1()>

In [30]:
def sheep_pie():
    A = df2_1['Sheep_Class']
    B = len(A)
    fig = px.pie(A, values=A.value_counts().values, names=A.value_counts().index)
    fig.update_traces(hoverinfo='label+percent+name', textinfo='value')
    
    fig.update_layout(
    title_text= f"No. of Transactions per Sheep Class; Total No. of Records = {B}",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Sheep Class', x=0.5, y=0.5, font_size=13, showarrow=False)])
    fig.show()
    
widgets.interact(sheep_pie)

interactive(children=(Output(),), _dom_classes=('widget-interact',))

<function __main__.sheep_pie()>

In [31]:
# Visuals for the Sheep Subset of the Livestock Dataset for pairwise comparison

class_list = list(df2_1.Sheep_Class.unique())


def sheepplotly2(Class1 = None, Class2 = None): 
    options = [Class1, Class2]

    A = df2_1[df2_1['Sheep_Class'].isin(options)]
    
    fig = px.line(A, x='date_to', y='avg_purch', color='Sheep_Class')
    
    fig.update_layout(barmode = 'group', 
    title= "Comparison Between Sheep Class Pairs")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(sheepplotly2, Class1 = class_list, Class2 = class_list)

interactive(children=(Dropdown(description='Class1', options=('A0', 'C3', 'A2', 'A3', 'A4', 'A5', 'A6', 'AB2',…

<function __main__.sheepplotly2(Class1=None, Class2=None)>

In [32]:
# Rendering the Cattle subset of the livestock dataset in a dataframe
df2_2_raw = pd.read_csv('../tests/Final_Output/Livestock_Cattle_2023-05-09.csv')
list2_2 = [0]
df2_2 = df2_2_raw[df2_2_raw.avg_purch.isin(list2_2) == False].reset_index(drop = True)
df2_2

Unnamed: 0,class,units,avg_mass,avg_purch,avg_selling,selling_min,selling_max,date_from,date_to
0,A2,6977,270.88,61.34,63.44,62.27,64.01,2022-05-30,2022-06-05
1,A3,531,276.90,57.46,62.22,60.83,63.44,2022-05-30,2022-06-05
2,AB2,591,262.46,58.20,61.35,60.34,62.40,2022-05-30,2022-06-05
3,AB3,77,261.91,55.68,61.96,61.56,62.45,2022-05-30,2022-06-05
4,B2,251,261.97,53.96,57.09,52.69,61.54,2022-05-30,2022-06-05
...,...,...,...,...,...,...,...,...,...
531,AB3,72,280.44,54.72,58.38,56.50,59.36,2022-02-07,2022-02-13
532,B2,188,253.51,53.02,54.98,54.10,55.49,2022-02-07,2022-02-13
533,B3,23,288.85,51.58,56.19,54.73,56.99,2022-02-07,2022-02-13
534,C2,336,256.79,47.06,46.20,48.32,52.92,2022-02-07,2022-02-13


In [33]:
df2_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 536 entries, 0 to 535
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   class        536 non-null    object 
 1   units        536 non-null    int64  
 2   avg_mass     536 non-null    float64
 3   avg_purch    536 non-null    float64
 4   avg_selling  536 non-null    float64
 5   selling_min  536 non-null    float64
 6   selling_max  536 non-null    float64
 7   date_from    536 non-null    object 
 8   date_to      536 non-null    object 
dtypes: float64(5), int64(1), object(3)
memory usage: 37.8+ KB


In [34]:
df2_2.isnull().sum()

class          0
units          0
avg_mass       0
avg_purch      0
avg_selling    0
selling_min    0
selling_max    0
date_from      0
date_to        0
dtype: int64

In [35]:
df2_2['date_from'] = pd.to_datetime(df2_2.date_from)
df2_2['date_to'] = pd.to_datetime(df2_2.date_to)
df2_2.rename(columns = {'class':'Cattle_Class'}, inplace = True)
df2_2.head(20)

Unnamed: 0,Cattle_Class,units,avg_mass,avg_purch,avg_selling,selling_min,selling_max,date_from,date_to
0,A2,6977,270.88,61.34,63.44,62.27,64.01,2022-05-30,2022-06-05
1,A3,531,276.9,57.46,62.22,60.83,63.44,2022-05-30,2022-06-05
2,AB2,591,262.46,58.2,61.35,60.34,62.4,2022-05-30,2022-06-05
3,AB3,77,261.91,55.68,61.96,61.56,62.45,2022-05-30,2022-06-05
4,B2,251,261.97,53.96,57.09,52.69,61.54,2022-05-30,2022-06-05
5,B3,43,260.32,51.91,55.59,52.67,58.75,2022-05-30,2022-06-05
6,C2,563,273.33,47.93,51.64,50.04,52.77,2022-05-30,2022-06-05
7,C3,146,285.3,46.89,51.7,50.15,53.39,2022-05-30,2022-06-05
8,A2,6302,282.75,59.67,60.25,58.67,60.79,2022-03-07,2022-03-13
9,A3,378,300.8,59.42,61.33,58.78,63.41,2022-03-07,2022-03-13


In [36]:
df2_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 536 entries, 0 to 535
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Cattle_Class  536 non-null    object        
 1   units         536 non-null    int64         
 2   avg_mass      536 non-null    float64       
 3   avg_purch     536 non-null    float64       
 4   avg_selling   536 non-null    float64       
 5   selling_min   536 non-null    float64       
 6   selling_max   536 non-null    float64       
 7   date_from     536 non-null    datetime64[ns]
 8   date_to       536 non-null    datetime64[ns]
dtypes: datetime64[ns](2), float64(5), int64(1), object(1)
memory usage: 37.8+ KB


In [37]:
# Visuals for the Cattle Subset of the Livestock Dataset

df2_2 = df2_2.sort_values(by=['date_to']).reset_index(drop = True)

def cattleplotly1(): 
    
    
    fig = px.line(df2_2, x='date_to', y='avg_purch', color='Cattle_Class')
    
    fig.update_layout(barmode = 'group', 
    title= "Plot Avg Purchase Price versus Dates")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(cattleplotly1)

interactive(children=(Output(),), _dom_classes=('widget-interact',))

<function __main__.cattleplotly1()>

In [38]:
# Visuals for the Cattle Subset of the Livestock Dataset for pairwise comparison

class_list = list(df2_2.Cattle_Class.unique())


def cattleplotly2(Class1 = None, Class2 = None): 
    options = [Class1, Class2]

    A = df2_2[df2_2['Cattle_Class'].isin(options)]
    
    fig = px.line(A, x='date_to', y='avg_purch', color='Cattle_Class')
    
    fig.update_layout(barmode = 'group', 
    title= "Comparison Between Cattle Class Pairs")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    fig.show()

widgets.interact(cattleplotly2, Class1 = class_list, Class2 = class_list)

interactive(children=(Dropdown(description='Class1', options=('A2', 'C3', 'AB2', 'AB3', 'B2', 'B3', 'C2', 'A3'…

<function __main__.cattleplotly2(Class1=None, Class2=None)>

In [39]:
def cattle_pie():
    A = df2_2['Cattle_Class']
    B = len(A)
    fig = px.pie(A, values=A.value_counts().values, names=A.value_counts().index)
    fig.update_traces(hoverinfo='label+percent+name', textinfo='value')
    
    fig.update_layout(
    title_text= f"No. of Transactions per Cattle Class; Total No. of Transactions = {B}",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Cattle Class', x=0.5, y=0.5, font_size=13, showarrow=False)])
    fig.show()
    
widgets.interact(cattle_pie)

interactive(children=(Output(),), _dom_classes=('widget-interact',))

<function __main__.cattle_pie()>

In [40]:
# Rendering the Pigs subset of the livestock dataset in a dataframe
df2_3_raw = pd.read_csv('../tests/Final_Output/Livestock_Pork_2023-05-09.csv')
list2_3 = [0]
df2_3 = df2_3_raw[df2_3_raw.avg_purch.isin(list2_3) == False].reset_index(drop = True)
df2_3

Unnamed: 0,class,units,avg_mass,avg_purch,purch_min,purch_max,date_from,date_to,weight
0,P,1064,50.46,28.00,25.37,29.69,2022-05-30,2022-06-05,20-55.99kg
1,O,73,48.34,27.46,25.60,30.60,2022-05-30,2022-06-05,20-55.99kg
2,R,13,51.14,26.36,25.50,29.24,2022-05-30,2022-06-05,20-55.99kg
3,C,4,51.78,23.29,19.75,28.00,2022-05-30,2022-06-05,20-55.99kg
4,S,923,22.75,22.75,22.75,0.00,2022-05-30,2022-06-05,20-55.99kg
...,...,...,...,...,...,...,...,...,...
1391,O,6525,87.89,24.18,23.69,25.20,2022-02-07,2022-02-13,80-99.99kg
1392,R,1141,89.80,23.96,22.78,24.40,2022-02-07,2022-02-13,80-99.99kg
1393,C,70,91.36,21.26,20.02,21.86,2022-02-07,2022-02-13,80-99.99kg
1394,U,4,88.95,20.00,18.50,22.00,2022-02-07,2022-02-13,80-99.99kg


In [41]:
df2_3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1396 entries, 0 to 1395
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   class      1396 non-null   object 
 1   units      1396 non-null   int64  
 2   avg_mass   1396 non-null   float64
 3   avg_purch  1396 non-null   float64
 4   purch_min  1396 non-null   float64
 5   purch_max  1396 non-null   float64
 6   date_from  1396 non-null   object 
 7   date_to    1396 non-null   object 
 8   weight     1396 non-null   object 
dtypes: float64(4), int64(1), object(4)
memory usage: 98.3+ KB


In [42]:
df2_3.isnull().sum()

class        0
units        0
avg_mass     0
avg_purch    0
purch_min    0
purch_max    0
date_from    0
date_to      0
weight       0
dtype: int64

In [43]:

for i, row in df2_3.iterrows():
    if df2_3.loc[i, :].isnull().sum() > 1:    
        df2_3.drop([i], inplace=True)



df2_3.isnull().sum()

class        0
units        0
avg_mass     0
avg_purch    0
purch_min    0
purch_max    0
date_from    0
date_to      0
weight       0
dtype: int64

In [44]:
df2_3

Unnamed: 0,class,units,avg_mass,avg_purch,purch_min,purch_max,date_from,date_to,weight
0,P,1064,50.46,28.00,25.37,29.69,2022-05-30,2022-06-05,20-55.99kg
1,O,73,48.34,27.46,25.60,30.60,2022-05-30,2022-06-05,20-55.99kg
2,R,13,51.14,26.36,25.50,29.24,2022-05-30,2022-06-05,20-55.99kg
3,C,4,51.78,23.29,19.75,28.00,2022-05-30,2022-06-05,20-55.99kg
4,S,923,22.75,22.75,22.75,0.00,2022-05-30,2022-06-05,20-55.99kg
...,...,...,...,...,...,...,...,...,...
1391,O,6525,87.89,24.18,23.69,25.20,2022-02-07,2022-02-13,80-99.99kg
1392,R,1141,89.80,23.96,22.78,24.40,2022-02-07,2022-02-13,80-99.99kg
1393,C,70,91.36,21.26,20.02,21.86,2022-02-07,2022-02-13,80-99.99kg
1394,U,4,88.95,20.00,18.50,22.00,2022-02-07,2022-02-13,80-99.99kg


In [45]:
df2_3['date_from'] = pd.to_datetime(df2_3.date_from)
df2_3['date_to'] = pd.to_datetime(df2_3.date_to)
df2_3.rename(columns = {'class':'Pig_Class'}, inplace = True)
df2_3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1396 entries, 0 to 1395
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Pig_Class  1396 non-null   object        
 1   units      1396 non-null   int64         
 2   avg_mass   1396 non-null   float64       
 3   avg_purch  1396 non-null   float64       
 4   purch_min  1396 non-null   float64       
 5   purch_max  1396 non-null   float64       
 6   date_from  1396 non-null   datetime64[ns]
 7   date_to    1396 non-null   datetime64[ns]
 8   weight     1396 non-null   object        
dtypes: datetime64[ns](2), float64(4), int64(1), object(2)
memory usage: 98.3+ KB


In [46]:
df2_3

Unnamed: 0,Pig_Class,units,avg_mass,avg_purch,purch_min,purch_max,date_from,date_to,weight
0,P,1064,50.46,28.00,25.37,29.69,2022-05-30,2022-06-05,20-55.99kg
1,O,73,48.34,27.46,25.60,30.60,2022-05-30,2022-06-05,20-55.99kg
2,R,13,51.14,26.36,25.50,29.24,2022-05-30,2022-06-05,20-55.99kg
3,C,4,51.78,23.29,19.75,28.00,2022-05-30,2022-06-05,20-55.99kg
4,S,923,22.75,22.75,22.75,0.00,2022-05-30,2022-06-05,20-55.99kg
...,...,...,...,...,...,...,...,...,...
1391,O,6525,87.89,24.18,23.69,25.20,2022-02-07,2022-02-13,80-99.99kg
1392,R,1141,89.80,23.96,22.78,24.40,2022-02-07,2022-02-13,80-99.99kg
1393,C,70,91.36,21.26,20.02,21.86,2022-02-07,2022-02-13,80-99.99kg
1394,U,4,88.95,20.00,18.50,22.00,2022-02-07,2022-02-13,80-99.99kg


In [47]:
df2_3.loc[df2_3['weight'] == '20-55.99kg']

Unnamed: 0,Pig_Class,units,avg_mass,avg_purch,purch_min,purch_max,date_from,date_to,weight
0,P,1064,50.46,28.00,25.37,29.69,2022-05-30,2022-06-05,20-55.99kg
1,O,73,48.34,27.46,25.60,30.60,2022-05-30,2022-06-05,20-55.99kg
2,R,13,51.14,26.36,25.50,29.24,2022-05-30,2022-06-05,20-55.99kg
3,C,4,51.78,23.29,19.75,28.00,2022-05-30,2022-06-05,20-55.99kg
4,S,923,22.75,22.75,22.75,0.00,2022-05-30,2022-06-05,20-55.99kg
...,...,...,...,...,...,...,...,...,...
1357,S,1,50.63,20.00,20.00,20.00,2022-05-02,2022-05-08,20-55.99kg
1376,P,1417,51.01,25.39,22.47,27.04,2022-02-07,2022-02-13,20-55.99kg
1377,O,56,53.99,25.49,23.17,27.00,2022-02-07,2022-02-13,20-55.99kg
1378,R,4,41.77,23.16,18.50,24.50,2022-02-07,2022-02-13,20-55.99kg


In [48]:
index =  np.arange(0, len(df2_3), 1)
df2_3.index = index

In [49]:
df2_3

Unnamed: 0,Pig_Class,units,avg_mass,avg_purch,purch_min,purch_max,date_from,date_to,weight
0,P,1064,50.46,28.00,25.37,29.69,2022-05-30,2022-06-05,20-55.99kg
1,O,73,48.34,27.46,25.60,30.60,2022-05-30,2022-06-05,20-55.99kg
2,R,13,51.14,26.36,25.50,29.24,2022-05-30,2022-06-05,20-55.99kg
3,C,4,51.78,23.29,19.75,28.00,2022-05-30,2022-06-05,20-55.99kg
4,S,923,22.75,22.75,22.75,0.00,2022-05-30,2022-06-05,20-55.99kg
...,...,...,...,...,...,...,...,...,...
1391,O,6525,87.89,24.18,23.69,25.20,2022-02-07,2022-02-13,80-99.99kg
1392,R,1141,89.80,23.96,22.78,24.40,2022-02-07,2022-02-13,80-99.99kg
1393,C,70,91.36,21.26,20.02,21.86,2022-02-07,2022-02-13,80-99.99kg
1394,U,4,88.95,20.00,18.50,22.00,2022-02-07,2022-02-13,80-99.99kg


a = df2_3.loc[df2_3['weight'] == '20-55.99kg']
b =  a.loc[a['Pig_Class'] == 'P']
c = b.loc[b['date_to'] == '2022-05-22']
c

In [50]:
list_pig = list(df2_3['weight'].unique())

def pig_pie(Weight):
    A = df2_3.loc[df2_3['weight'] == Weight, ('Pig_Class')]
 
    fig = px.pie(A, values=A.value_counts().values, names=A.value_counts().index)
    fig.update_traces(hoverinfo='label+percent+name', textinfo='value')
    
    fig.update_layout(
    title_text= f"Trading Activities for {Weight} Across Pig Classes",
    )
    fig.show()
    
widgets.interact(pig_pie, Weight = list_pig)

interactive(children=(Dropdown(description='Weight', options=('20-55.99kg', '56-64..99kg', '65-79.99kg', '80-9…

<function __main__.pig_pie(Weight)>

In [51]:
# Visuals for the Sheep Subset of the Livestock Dataset for pairwise comparison

df2_3 = df2_3.sort_values(by=['date_to']).reset_index(drop = True)

class_list1 = list(df2_3.weight.unique())
class_list2 = list(df2_3.Pig_Class.unique())


def sheepplotly3(Weight_Class, Pig_Class): 
    
    A = df2_3.loc[df2_3['weight'] == Weight_Class, ('Pig_Class', 'avg_purch','date_to', 'avg_mass')]
    B = A.loc[A['Pig_Class'] == Pig_Class, ('avg_purch', 'date_to', 'avg_mass')]
    
    fig = go.Figure(data =[
                    go.Scatter(name = 'Average Purchase Price', x=B['date_to'], y =B['avg_purch'], yaxis="y")
                    ])
    
    fig.update_layout(barmode = 'group', 
    title= f"Avg Purch Price for Class {Pig_Class}, {Weight_Class} Weight Category")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    
    fig.show()

widgets.interact(sheepplotly3, Weight_Class = class_list1, Pig_Class = class_list2)

interactive(children=(Dropdown(description='Weight_Class', options=('20-55.99kg', '80-99.99kg', '65-79.99kg', …

<function __main__.sheepplotly3(Weight_Class, Pig_Class)>

    
    fig.update_layout(barmode = 'group', 
    title= f"Avg Purch Price for Class {Pig_Class}, {Weight_Class} Weight Category")
    
        
    fig = go.Figure(data =[
                    go.Scatter(name = 'Average Purchase Price', x=B['date_to'], y =B['avg_purch'], yaxis="y")
                    ])

In [52]:
df2_3 = df2_3.sort_values(by=['date_to']).reset_index(drop = True)

class_list1 = list(df2_3.weight.unique())
class_list2 = list(df2_3.Pig_Class.unique())


def sheepplotly3(Weight_Class, Pig_Class): 
    
    A = df2_3.loc[df2_3['weight'] == Weight_Class, ('Pig_Class', 'avg_purch','date_to', 'avg_mass', 'units')]
    B = A.loc[A['Pig_Class'] == Pig_Class, ('avg_purch', 'date_to', 'avg_mass', 'units')]

    # initialize a Figure object and store it in
    # a variable fig
    fig = go.Figure()

    # add x and y values for the 1st scatter
    # plot and name the yaxis as yaxis1 values

    fig.add_trace(go.Bar(
        x=B['date_to'],
        y=B['units'],
        name="Units", marker=dict(color='#e377c2')
        
    ))
    
    # add x and y values for the 2nd scatter
    # plot and name the yaxis as yaxis2 values
    fig.add_trace(go.Scatter(
        x=B['date_to'],
        y=B['avg_mass'],
        name="Average Mass", marker=dict(color='#636EFA'),
        yaxis="y2"
    ))

    # add x and y values for the 3rd scatter
    # plot and name the yaxis as yaxis3 values
    fig.add_trace(go.Scatter(
        x=B['date_to'],
        y=B['avg_purch'],
        name="Average Purchase Price", marker=dict(color="#006400"),
        yaxis="y3"
    ))       
    

    # Create axis objects
    fig.update_layout(
        # split the x-axis to fraction of plots in
        # proportions
        xaxis=dict(
            domain=[0.2, 0.8]
        ),

       
        # pass the y-axis title, titlefont, color
        # and tickfont as a dictionary and store
        # it an variable yaxis
        yaxis=dict(
            title="Units",
            titlefont=dict(
                color='#e377c2'
            ),
            tickfont=dict(
                color='#e377c2'
            ),
            anchor="free",  # specifying x - axis has to be the fixed
            side="left",  # specifying the side the axis should be present
            position=0.1  # specifying the position of the axis
        ),

        # pass the y-axis 2 title, titlefont, color and
        # tickfont as a dictionary and store it an
        # variable yaxis 2
        yaxis2=dict(
            title="Average Mass",
            titlefont=dict(
                color='#636EFA'
            ),
            tickfont=dict(
                color='#636EFA'
            ),
            anchor="free",  # specifying x - axis has to be the fixed
            overlaying="y",  # specifying y - axis has to be separated
            side="left",  # specifying the side the axis should be present
            position=0.2  # specifying the position of the axis
        ),

        # pass the y-axis 3 title, titlefont, color and
        # tickfont as a dictionary and store it an
        # variable yaxis 3
        yaxis3=dict(
            title="Average Purchase Price",
            titlefont=dict(
                color="#006400"
            ),
            tickfont=dict(
                color="#006400"
            ),
            anchor="x",     # specifying x - axis has to be the fixed
            overlaying="y",  # specifyinfg y - axis has to be separated
            side="right"  # specifying the side the axis should be present
            )

    
         )
      
    
    # Update layout of the plot namely title_text, width
    # and place it in the center using title_x parameter
    # as shown
    fig.update_layout(
        title_text=f"Avg Purch Price for Class {Pig_Class}, {Weight_Class} Weight Category",
        width=1500,
        title_x=0.2)

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    
    fig.show()

widgets.interact(sheepplotly3, Weight_Class = class_list1, Pig_Class = class_list2)


interactive(children=(Dropdown(description='Weight_Class', options=('20-55.99kg', '65-79.99kg', '80-99.99kg', …

<function __main__.sheepplotly3(Weight_Class, Pig_Class)>

In [53]:
df3_init = pd.read_csv('../tests/Final_Output/processed_horticulture_2023-05-11.csv')
df3_init

Unnamed: 0,product,variety,class,size,package,unit(kg),closing_price,high_price,low_price,average_price,total_sales,sales_quantity,closing_stock,date,market_name,market_code,market_type
0,1. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,BAG,3.0,0.0,0.0,0.0,0.0,0.0,0,98,2023-05-04,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
1,2. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,BAG,3.0,0.0,0.0,0.0,0.0,0.0,0,98,2023-05-03,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
2,3. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,BAG,3.0,0.0,0.0,0.0,0.0,0.0,0,98,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
3,4. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
4,5. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,BAG,3.0,0.0,0.0,0.0,0.0,0.0,0,98,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322360,57. CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,0.0,0.0,0.0,0.0,0.0,0,2,2023-02-08,East London Fresh Produce Market,EAS,Fresh Produce Markets
322361,58. CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,0.0,0.0,0.0,0.0,0.0,0,2,2023-02-07,East London Fresh Produce Market,EAS,Fresh Produce Markets
322362,59. CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,0.0,0.0,0.0,0.0,0.0,0,2,2023-02-07,East London Fresh Produce Market,EAS,Fresh Produce Markets
322363,60. CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,0.0,0.0,0.0,0.0,0.0,0,2,2023-02-06,East London Fresh Produce Market,EAS,Fresh Produce Markets


In [54]:
df3_init.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 322365 entries, 0 to 322364
Data columns (total 17 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   product         322365 non-null  object 
 1   variety         322365 non-null  object 
 2   class           322365 non-null  object 
 3   size            322365 non-null  object 
 4   package         322365 non-null  object 
 5   unit(kg)        321165 non-null  float64
 6   closing_price   322365 non-null  float64
 7   high_price      322365 non-null  float64
 8   low_price       322365 non-null  float64
 9   average_price   322365 non-null  float64
 10  total_sales     322365 non-null  float64
 11  sales_quantity  322365 non-null  int64  
 12  closing_stock   322365 non-null  int64  
 13  date            322365 non-null  object 
 14  market_name     322365 non-null  object 
 15  market_code     322365 non-null  object 
 16  market_type     322365 non-null  object 
dtypes: float64

In [55]:
list1 = [0]
df3 = df3_init[df3_init.closing_price.isin(list1) == False].reset_index(drop = True)
df3

Unnamed: 0,product,variety,class,size,package,unit(kg),closing_price,high_price,low_price,average_price,total_sales,sales_quantity,closing_stock,date,market_name,market_code,market_type
0,4. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
1,6. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
2,8. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,45.0,45.0,45.0,45.0,180.0,4,11,2023-04-28,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
3,11. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,50.0,50.0,50.0,50.0,50.0,1,59,2023-04-21,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
4,12. PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,50.0,50.0,50.0,50.0,50.0,1,59,2023-04-21,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219526,47. CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,10.0,10.0,10.0,10.0,20.0,2,0,2023-02-17,East London Fresh Produce Market,EAS,Fresh Produce Markets
219527,48. CELERY,NO VARIETY,NOT GRADED,NO SIZE,PUNNET,0.0,15.0,15.0,15.0,15.0,150.0,10,0,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets
219528,49. CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,12.0,12.0,12.0,12.0,60.0,5,2,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets
219529,50. CELERY,NO VARIETY,NOT GRADED,NO SIZE,PUNNET,0.0,15.0,15.0,15.0,15.0,150.0,10,0,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets


In [56]:
df3['date'] = pd.to_datetime(df3.date)

In [57]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 219531 entries, 0 to 219530
Data columns (total 17 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   product         219531 non-null  object        
 1   variety         219531 non-null  object        
 2   class           219531 non-null  object        
 3   size            219531 non-null  object        
 4   package         219531 non-null  object        
 5   unit(kg)        218731 non-null  float64       
 6   closing_price   219531 non-null  float64       
 7   high_price      219531 non-null  float64       
 8   low_price       219531 non-null  float64       
 9   average_price   219531 non-null  float64       
 10  total_sales     219531 non-null  float64       
 11  sales_quantity  219531 non-null  int64         
 12  closing_stock   219531 non-null  int64         
 13  date            219531 non-null  datetime64[ns]
 14  market_name     219531 non-null  obj

In [58]:
df3[df3['closing_price'] == 0]

Unnamed: 0,product,variety,class,size,package,unit(kg),closing_price,high_price,low_price,average_price,total_sales,sales_quantity,closing_stock,date,market_name,market_code,market_type


In [59]:
df3[['series', 'product']] = df3['product'].str.split('.', expand=True)
df3

Unnamed: 0,product,variety,class,size,package,unit(kg),closing_price,high_price,low_price,average_price,total_sales,sales_quantity,closing_stock,date,market_name,market_code,market_type,series
0,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,4
1,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,6
2,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,45.0,45.0,45.0,45.0,180.0,4,11,2023-04-28,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,8
3,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,50.0,50.0,50.0,50.0,50.0,1,59,2023-04-21,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,11
4,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,50.0,50.0,50.0,50.0,50.0,1,59,2023-04-21,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219526,CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,10.0,10.0,10.0,10.0,20.0,2,0,2023-02-17,East London Fresh Produce Market,EAS,Fresh Produce Markets,47
219527,CELERY,NO VARIETY,NOT GRADED,NO SIZE,PUNNET,0.0,15.0,15.0,15.0,15.0,150.0,10,0,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets,48
219528,CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,12.0,12.0,12.0,12.0,60.0,5,2,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets,49
219529,CELERY,NO VARIETY,NOT GRADED,NO SIZE,PUNNET,0.0,15.0,15.0,15.0,15.0,150.0,10,0,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets,50


In [60]:
df3['product'].str.replace(' ', '')
df3

Unnamed: 0,product,variety,class,size,package,unit(kg),closing_price,high_price,low_price,average_price,total_sales,sales_quantity,closing_stock,date,market_name,market_code,market_type,series
0,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,4
1,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,40.0,40.0,40.0,40.0,440.0,11,0,2023-05-02,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,6
2,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,45.0,45.0,45.0,45.0,180.0,4,11,2023-04-28,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,8
3,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,50.0,50.0,50.0,50.0,50.0,1,59,2023-04-21,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,11
4,PECAN NUTS,NO VARIETY,NOT GRADED,NO SIZE,PER KILO / POCKET,1.0,50.0,50.0,50.0,50.0,50.0,1,59,2023-04-21,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219526,CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,10.0,10.0,10.0,10.0,20.0,2,0,2023-02-17,East London Fresh Produce Market,EAS,Fresh Produce Markets,47
219527,CELERY,NO VARIETY,NOT GRADED,NO SIZE,PUNNET,0.0,15.0,15.0,15.0,15.0,150.0,10,0,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets,48
219528,CELERY,NO VARIETY,NOT GRADED,NO SIZE,BUNCH,0.0,12.0,12.0,12.0,12.0,60.0,5,2,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets,49
219529,CELERY,NO VARIETY,NOT GRADED,NO SIZE,PUNNET,0.0,15.0,15.0,15.0,15.0,150.0,10,0,2023-02-16,East London Fresh Produce Market,EAS,Fresh Produce Markets,50


In [61]:
list1 = df3['product'].unique()
list1

array([' PECAN NUTS', ' BROCOLLI', ' AMADUMBE', ' BUTTERNUT SQUASHES',
       ' OTHER BERRIES', ' BABY MARROWS', ' GREEN BEANS', ' TOMATOES',
       ' GEM SQUASHES', ' GRANADILLAS', ' THYME', ' GREEN MEALIES',
       ' BANANAS', ' OTHER', ' CABBAGE', ' BUTTER LETTUCE', ' PEARS',
       ' APRICOTS', ' MANGOS', ' SOU-SOU (CHAYOTE)', ' DRIED BEANS',
       ' PRUNES', ' CHILLIES', ' NAARTJIES (MANDARINS)', ' MUSHROOMS',
       ' CHINESE GREENS', ' POTATOES - WASHED', ' DRIED FRUITS',
       ' RASPBERRY', ' MARROWS', ' AVOCADOS', ' ENGLISH CUCUMBERS',
       ' SPINACH', ' LETTUCE', ' COCKTAIL TOMATOES', ' RADISHES',
       ' METIBALJI', ' ONIONS', ' CAULIFLOWER', ' PLUMS', ' PINEAPPLES',
       ' CELERY', ' OCRA', ' PUMKINS', ' APPLES', ' CORIANDER', ' LIMES',
       ' SPEARMINT', ' EXOTIC CITRUS', ' GRAPEFRUIT', ' WATERMELONS',
       ' CALABASHES', ' PICKLE ONION', ' FLOWERS', ' SWEETCORN',
       ' KIWI FRUIT', ' PEACHES', ' BEETROOTS', ' BRINJALS/EGGS PLANT',
       ' PAPAYAS', ' PAWPAW

In [62]:
df3.market_name.unique()

array(['Pietermaritzburg Fresh Produce Market',
       'Nelspruit Fresh Produce Market', 'Tshwane Fresh Produce Market',
       'George Fresh Produce Market',
       'Bloemfontein (Mangaung) Fresh Produce Market',
       'Cape Town Fresh Produce Market', 'Durban Fresh Produce Market',
       'Kimberley (Sol Plaatje) Fresh Produce Market',
       'East London Fresh Produce Market',
       'Welkom (Matjhabeng) Fresh Produce Market',
       'Mthatha (Kei) Fresh Produce Market',
       'Johannesburg Fresh Produce Market',
       'Klerksdorp Fresh Produce Market',
       'Vereeniging Fresh Produce Market', 'Springs Fresh Produce Market',
       'Port Elizabeth Fresh Produce Market',
       'Mpumalanga Fresh Produce Market', 'Witbank Fresh Produce Market'],
      dtype=object)

In [63]:
list1 = df3['product'].unique()
list1


array([' PECAN NUTS', ' BROCOLLI', ' AMADUMBE', ' BUTTERNUT SQUASHES',
       ' OTHER BERRIES', ' BABY MARROWS', ' GREEN BEANS', ' TOMATOES',
       ' GEM SQUASHES', ' GRANADILLAS', ' THYME', ' GREEN MEALIES',
       ' BANANAS', ' OTHER', ' CABBAGE', ' BUTTER LETTUCE', ' PEARS',
       ' APRICOTS', ' MANGOS', ' SOU-SOU (CHAYOTE)', ' DRIED BEANS',
       ' PRUNES', ' CHILLIES', ' NAARTJIES (MANDARINS)', ' MUSHROOMS',
       ' CHINESE GREENS', ' POTATOES - WASHED', ' DRIED FRUITS',
       ' RASPBERRY', ' MARROWS', ' AVOCADOS', ' ENGLISH CUCUMBERS',
       ' SPINACH', ' LETTUCE', ' COCKTAIL TOMATOES', ' RADISHES',
       ' METIBALJI', ' ONIONS', ' CAULIFLOWER', ' PLUMS', ' PINEAPPLES',
       ' CELERY', ' OCRA', ' PUMKINS', ' APPLES', ' CORIANDER', ' LIMES',
       ' SPEARMINT', ' EXOTIC CITRUS', ' GRAPEFRUIT', ' WATERMELONS',
       ' CALABASHES', ' PICKLE ONION', ' FLOWERS', ' SWEETCORN',
       ' KIWI FRUIT', ' PEACHES', ' BEETROOTS', ' BRINJALS/EGGS PLANT',
       ' PAPAYAS', ' PAWPAW

In [64]:
df3['variety'].unique()

array(['NO VARIETY', 'OTHER GREEN BEANS', 'RED', 'JAM TOMATOES',
       'LONGLIFE', 'OTHER TOMATOES', 'FINGERS (SINGLE)', 'HANDS',
       'LOWER GRADE BANANAS', 'FORELLE', "PACKHAM'S TRIUMPH",
       'OTHER PEARS', 'ROSEMARIE', 'BEURRE CLAIRGEAU',
       'BARTLETT / BON CHRETIEN', 'ABATE FETEL', 'OTHER', 'KENT',
       'OTHER VARIETIES', 'KEITT', 'LONG GREEN', 'GREEN', 'HERMES',
       'MONDIAL', 'NICOLA', 'UP-TO-DATE', 'SERENADE', 'HERTHA',
       'VANDERPLANK', 'HASS', 'OSMERIAS', 'PINKERTON', 'FUERTE', 'MAHA',
       'OTHER LETTUCE', 'MIX', 'GREEN CURLY', 'BROWN',
       'OTHER (PLEASE USE 1)', 'WHITE', 'ANGELINO', 'HARRY PICKSTONE',
       'ELDORADO', 'RED BEAUT', 'LAETITIA', 'METHLEY', 'SANTA ROSA',
       'SONGOLD', 'RUBY NEL', 'SOUVENIR', 'CELEBRATION', 'GAVIOTA',
       'LADY RED', 'SAPPHIRE', 'QUEEN VICTORIA', 'CROWN PRINCE',
       'OTHER PUMPKINS', 'BRAEBURN', 'CRIPPS PINK', 'FUJI', 'GALA',
       'GOLDEN DELICIOUS', 'GRANNY SMITH', 'OTHER APPLES', 'ROYAL BEAUT',
       'ROY

In [65]:
df3.rename(columns = {'class':'Veg_Class'}, inplace = True)

In [66]:
df3['Veg_Class'].unique()

array(['NOT GRADED', 'CLASS 1', 'CLASS 2', 'CLASS 3', 'CLASS 4'],
      dtype=object)

In [67]:
df3['package'].unique()

array(['PER KILO / POCKET', 'BAG', 'PUNNET', 'BANANA BOX', 'HALF POCKET',
       'SUGAR POCKET', 'STANDARD POCKET', '3 QUARTER PORCKET (7kg)',
       'STANDARD TRAY', 'BOX', 'TOMATO CASE', 'PUNNET PACK (Pre Pack)',
       'DOUBBLE LAYER TRAY', 'CRATE', 'MULTI LAYER TRAYER',
       'DOMPLE JUMBLE', 'METRIC BOX', 'QUARTER POCKET', 'CARTON',
       'HALF CARTON', 'EACH', 'ECONOMIC PACK', 'HALF TRAY',
       'PLASTIC CONTAINER', 'OTHER CONTAINERS', 'BUNCH', 'POCKET / BUNCH',
       'DECON', 'EXPORT BOX', 'BULK PACK (BOX)', 'BULK PACK', 'KILOGRAM',
       'HALF DECON', 'PER 100 LB (45kg)', 'LOTS', 'PINEAPPLE CASE',
       'DOUBBLE POCKET', 'PER LITRE (1000ml)', 'PER LITRE (500ml)',
       'BUSHEL BOX'], dtype=object)

In [68]:
df3_uniques_set = df3[['product', 'variety', 'Veg_Class', 'size', 'package', 'market_name', 'market_code', 'market_type' ]]
df3_uniques_set.nunique()

product        126
variety        267
Veg_Class        5
size            12
package         40
market_name     18
market_code     18
market_type      1
dtype: int64

In [69]:
A = df3.loc[df3['product'] == ' CHILLIES']
A

Unnamed: 0,product,variety,Veg_Class,size,package,unit(kg),closing_price,high_price,low_price,average_price,total_sales,sales_quantity,closing_stock,date,market_name,market_code,market_type,series
2559,CHILLIES,GREEN,NOT GRADED,NO SIZE,SUGAR POCKET,17.0,200.83,300.0,20.0,200.83,2410.0,12,55,2023-05-04,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,2
2560,CHILLIES,GREEN,NOT GRADED,NO SIZE,BANANA BOX,10.0,150.00,150.0,150.0,150.00,300.0,2,9,2023-05-04,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,3
2561,CHILLIES,GREEN,NOT GRADED,NO SIZE,QUARTER POCKET,3.0,40.00,40.0,40.0,40.00,80.0,2,8,2023-05-04,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,5
2562,CHILLIES,GREEN,NOT GRADED,NO SIZE,STANDARD POCKET,7.0,20.00,20.0,20.0,20.00,100.0,5,6,2023-05-03,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,7
2563,CHILLIES,GREEN,NOT GRADED,NO SIZE,SUGAR POCKET,17.0,174.00,250.0,60.0,174.00,1740.0,10,36,2023-05-03,Pietermaritzburg Fresh Produce Market,PIE,Fresh Produce Markets,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219110,CHILLIES,GREEN,NOT GRADED,NO SIZE,BOX,3.0,68.80,70.0,60.0,68.80,1720.0,25,15,2023-02-06,East London Fresh Produce Market,EAS,Fresh Produce Markets,136
219111,CHILLIES,GREEN,NOT GRADED,NO SIZE,PUNNET,0.0,5.62,15.0,5.0,5.62,90.0,16,4,2023-02-06,East London Fresh Produce Market,EAS,Fresh Produce Markets,137
219112,CHILLIES,GREEN,NOT GRADED,NO SIZE,BOX,3.0,63.21,75.0,60.0,63.21,885.0,14,40,2023-02-02,East London Fresh Produce Market,EAS,Fresh Produce Markets,138
219113,CHILLIES,GREEN,NOT GRADED,NO SIZE,PUNNET,0.0,5.00,5.0,5.0,5.00,50.0,10,22,2023-02-02,East London Fresh Produce Market,EAS,Fresh Produce Markets,139


In [70]:
A['package'].unique()

array(['SUGAR POCKET', 'BANANA BOX', 'QUARTER POCKET', 'STANDARD POCKET',
       'HALF POCKET', 'PER KILO / POCKET', 'OTHER CONTAINERS', 'BAG',
       'PUNNET PACK (Pre Pack)', 'TOMATO CASE', 'BULK PACK (BOX)', 'BOX',
       'PUNNET', 'DOMPLE JUMBLE', 'ECONOMIC PACK', 'STANDARD TRAY',
       'HALF TRAY', 'CARTON', 'DOUBBLE LAYER TRAY'], dtype=object)

In [71]:
df3['size'].unique()

array(['NO SIZE', 'MEDIUM', 'LARGE', 'SMALL', 'SMALL MEDIUM',
       'MEDIUM PLUS', 'EXTRA LARGE', 'UNDER SIZE', 'XX LARGE',
       'NEW (BABY)', 'XXX LARGE', 'JUMBO'], dtype=object)

In [72]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 219531 entries, 0 to 219530
Data columns (total 18 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   product         219531 non-null  object        
 1   variety         219531 non-null  object        
 2   Veg_Class       219531 non-null  object        
 3   size            219531 non-null  object        
 4   package         219531 non-null  object        
 5   unit(kg)        218731 non-null  float64       
 6   closing_price   219531 non-null  float64       
 7   high_price      219531 non-null  float64       
 8   low_price       219531 non-null  float64       
 9   average_price   219531 non-null  float64       
 10  total_sales     219531 non-null  float64       
 11  sales_quantity  219531 non-null  int64         
 12  closing_stock   219531 non-null  int64         
 13  date            219531 non-null  datetime64[ns]
 14  market_name     219531 non-null  obj

In [73]:
class_list1 = list(df3['market_name'].unique())
MarketW = Dropdown(options = class_list1)

A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
class_list2 = list(A['product'].unique())
ProductW = Dropdown(options = class_list2)

A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
class_list3 = list(B['variety'].unique())
VarietyW =  Dropdown(options = class_list3)

A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]    
C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
class_list4 = list(C['Veg_Class'].unique())
VegClassW =  Dropdown(options = class_list4)

A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]    
C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
D = C.loc[C['Veg_Class'] == VegClassW.value,  ('size', 'package', 'closing_price', 'date')]
class_list5 = list(D['size'].unique())
SizeW = Dropdown(options = class_list5)

A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]    
C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
D = C.loc[C['Veg_Class'] == VegClassW.value,  ('size', 'package', 'closing_price', 'date')]
E = D.loc[D['size'] == SizeW.value,  ('package', 'closing_price', 'date')]
class_list6 = list(E['package'].unique())
PackageW = Dropdown(options = class_list6)



@interact(Market = MarketW, Product = ProductW, Variety = VarietyW, VegClass = VegClassW, Size = SizeW, Package = PackageW)  

def Vegplotly(Market, Product, Variety, VegClass, Size, Package):
    A = df3.loc[df3['market_name'] == Market, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list2 = list(A['product'].unique())
    ProductW.options = class_list2 # Here is the trick, i.e. update cityW.options based on country, namely countryW.value.
    
    B = A.loc[A['product'] == Product, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list3 = list(B['variety'].unique())
    VarietyW.options = class_list3 # Dittoo
    
    C = B.loc[B['variety'] == Variety, ('Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list4 = list(C['Veg_Class'].unique())
    VegClassW.options = class_list4 # Dittoo
    
    D = C.loc[C['Veg_Class'] == VegClass,  ('size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list5 = list(D['size'].unique())
    SizeW.options = class_list5 # Dittoo
    
    E = D.loc[D['size'] == Size,  ('package', 'closing_price', 'date', 'sales_quantity')]
    class_list6 = list(E['package'].unique())
    PackageW.options = class_list6 # Dittoo
    
    F = E.loc[E['package'] == Package,  ('closing_price', 'date', 'sales_quantity')]

    fig = px.line(F, x='date', y='closing_price')
    
    fig.update_layout(barmode = 'group', 
    title= "Closing Price")
 
    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    
    fig.show()



interactive(children=(Dropdown(description='Market', options=('Pietermaritzburg Fresh Produce Market', 'Nelspr…

In [74]:
class_list1 = list(df3['market_name'].unique())
MarketW = Dropdown(options = class_list1)

A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
class_list2 = list(A['product'].unique())
ProductW = Dropdown(options = class_list2)

B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
class_list3 = list(B['variety'].unique())
VarietyW =  Dropdown(options = class_list3)
   
C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
class_list4 = list(C['Veg_Class'].unique())
VegClassW =  Dropdown(options = class_list4)

D = C.loc[C['Veg_Class'] == VegClassW.value,  ('size', 'package', 'closing_price', 'date')]
class_list5 = list(D['size'].unique())
SizeW = Dropdown(options = class_list5)


E = D.loc[D['size'] == SizeW.value,  ('package', 'closing_price', 'date')]
class_list6 = list(E['package'].unique())
PackageW = Dropdown(options = class_list6)



@interact(Market = MarketW, Product = ProductW, Variety = VarietyW, VegClass = VegClassW, Size = SizeW, Package = PackageW)  

def Vegplotly(Market, Product, Variety, VegClass, Size, Package):
    A = df3.loc[df3['market_name'] == Market, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list2 = list(A['product'].unique())
    ProductW.options = class_list2 # Here is the trick, i.e. update cityW.options based on country, namely countryW.value.
    
    B = A.loc[A['product'] == Product, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list3 = list(B['variety'].unique())
    VarietyW.options = class_list3 # Dittoo
    
    C = B.loc[B['variety'] == Variety, ('Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list4 = list(C['Veg_Class'].unique())
    VegClassW.options = class_list4 # Dittoo
    
    D = C.loc[C['Veg_Class'] == VegClass,  ('size', 'package', 'closing_price', 'date', 'sales_quantity')]
    class_list5 = list(D['size'].unique())
    SizeW.options = class_list5 # Dittoo
    
    E = D.loc[D['size'] == Size,  ('package', 'closing_price', 'date', 'sales_quantity')]
    class_list6 = list(E['package'].unique())
    PackageW.options = class_list6 # Dittoo
    
    F = E.loc[E['package'] == Package,  ('closing_price', 'date', 'sales_quantity')]

    fig = px.line(F, x='date', y='closing_price')
    
    fig.update_layout(barmode = 'group', 
    title= "Closing Price")
 
    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    
    fig.show()



interactive(children=(Dropdown(description='Market', options=('Pietermaritzburg Fresh Produce Market', 'Nelspr…

In [75]:

class_list2 = list(df3['product'].unique())

def product_pie(Products):
    A = df3.loc[df3['product'] == Products, ('market_name')]
 
    fig = px.pie(A, values=A.value_counts().values)
    fig.update_traces(hoverinfo='label+percent+name', textinfo='value', hole=.4)
    
    fig.update_layout(
    title_text= f"Trading Activities for{Products} Across Various Markets ",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text=f'{Products}', x=0.5, y=0.5, font_size=13, showarrow=False)])
    fig.show()
    
widgets.interact(product_pie, Products = class_list2)

interactive(children=(Dropdown(description='Products', options=(' PECAN NUTS', ' BROCOLLI', ' AMADUMBE', ' BUT…

<function __main__.product_pie(Products)>

In [76]:
df3 = df3.sort_values(by=['date']).reset_index(drop = True)

# from ipywidgets import interact, Dropdown
class_list1 = list(df3['market_name'].unique())
MarketW = Dropdown(options = class_list1)
ProductW = Dropdown()
VarietyW =  Dropdown()
VegClassW =  Dropdown()
SizeW = Dropdown()
PackageW = Dropdown()

def ProductW_options(*args): # *args represent zero (case here) or more arguments.
    A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
    ProductW.options = list(A['product'].unique())
MarketW.observe(ProductW_options) # Here is the trick, i.e. update cityW.options based on countryW.value.

def VarietyW_options(*args): # *args represent zero (case here) or more arguments.
    A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
    B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
    VarietyW.options = list(B['variety'].unique())
ProductW.observe(VarietyW_options) # Here is the trick, i.e. update cityW.options based on countryW.value.


def VegClassW_options(*args): # *args represent zero (case here) or more arguments.
    A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
    B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]    
    C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
    VegClassW.options = list(C['Veg_Class'].unique())
VarietyW.observe(VegClassW_options) # Here is the trick, i.e. update cityW.options based on countryW.value.


def SizeW_options(*args): # *args represent zero (case here) or more arguments.
    A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
    B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]    
    C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
    D = C.loc[C['Veg_Class'] == VegClassW.value,  ('size', 'package', 'closing_price', 'date')]
    SizeW.options = list(D['size'].unique())
VegClassW.observe(SizeW_options) # Here is 

def PackageW_options(*args): # *args represent zero (case here) or more arguments.
    A = df3.loc[df3['market_name'] == MarketW.value, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]
    B = A.loc[A['product'] == ProductW.value, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date')]    
    C = B.loc[B['variety'] == VarietyW.value, ('Veg_Class', 'size', 'package', 'closing_price', 'date')]
    D = C.loc[C['Veg_Class'] == VegClassW.value,  ('size', 'package', 'closing_price', 'date')]
    E = D.loc[D['size'] == SizeW.value,  ('package', 'closing_price', 'date')]
    PackageW.options = list(E['package'].unique())
SizeW.observe(PackageW_options) # Here is 


@interact(Market = MarketW, Product = ProductW, Variety = VarietyW, VegClass = VegClassW, Size = SizeW, Package = PackageW)  

def filtering(Market, Product, Variety, VegClass, Size, Package):
    A = df3.loc[df3['market_name'] == Market, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    B = A.loc[A['product'] == Product, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]    
    C = B.loc[B['variety'] == Variety, ('Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    D = C.loc[C['Veg_Class'] == VegClass,  ('size', 'package', 'closing_price', 'date', 'sales_quantity')]
    E = D.loc[D['size'] == Size,  ('package', 'closing_price', 'date', 'sales_quantity')]
    
    F = E.loc[E['package'] == Package,  ('closing_price', 'date', 'sales_quantity')]
    
  
    
    fig = go.Figure(data =[
                    go.Scatter(name = 'Closing Price', x=F['date'], y =F['closing_price'])
                    ])
    
    

    fig.update_layout(barmode = 'group', 
    title= "Closing Price")

    
    # Add range slider
    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )

    
    fig.show()

interactive(children=(Dropdown(description='Market', options=('Durban Fresh Produce Market', 'Cape Town Fresh …

In [77]:


def filtering(Market, Product, Variety, VegClass, Size, Package):
    A = df3.loc[df3['market_name'] == Market, ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    B = A.loc[A['product'] == Product, ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]    
    C = B.loc[B['variety'] == Variety, ('Veg_Class', 'size', 'package', 'closing_price', 'date', 'sales_quantity')]
    D = C.loc[C['Veg_Class'] == VegClass,  ('size', 'package', 'closing_price', 'date', 'sales_quantity')]
    E = D.loc[D['size'] == Size,  ('package', 'closing_price', 'date', 'sales_quantity')]
    
    F = E.loc[E['package'] == Package,  ('closing_price', 'date', 'sales_quantity')]
    return F

filtering(MarketW, ProductW, VarietyW, VegClassW, SizeW, PackageW)

Unnamed: 0,closing_price,date,sales_quantity


In [78]:
A = df3.loc[df3['market_name'] == 'George Fresh Produce Market', ('product', 'variety','Veg_Class', 'size', 'package', 'closing_price', 'date','sales_quantity')]
B = A.loc[A['product'] == ' GRANADILLAS', ('variety','Veg_Class', 'size', 'package', 'closing_price', 'date','sales_quantity')]    
C = B.loc[B['variety'] == 'NO VARIETY', ('Veg_Class', 'size', 'package', 'closing_price', 'date','sales_quantity')]
D = C.loc[C['Veg_Class'] == 'NOT GRADED',  ('size', 'package', 'closing_price', 'date','sales_quantity')]
E = D.loc[D['size'] == 'NO SIZE',  ('size','package', 'closing_price', 'date','sales_quantity')]
E


Unnamed: 0,size,package,closing_price,date,sales_quantity
104196,NO SIZE,BOX,80.0,2023-03-13,1
112567,NO SIZE,BOX,90.0,2023-03-20,2
112568,NO SIZE,HALF POCKET,90.0,2023-03-20,2
125648,NO SIZE,BOX,70.0,2023-03-27,2
125741,NO SIZE,BOX,70.0,2023-03-27,2
125742,NO SIZE,HALF POCKET,70.0,2023-03-27,1
125744,NO SIZE,HALF POCKET,70.0,2023-03-27,1
167530,NO SIZE,BOX,50.0,2023-04-12,1


df3 = df3.sort_values(by=['date']).reset_index(drop = True)

class_list1 = list(df3['market_name'].unique())

widget1 = Dropdown(options = class_list1)
A = df3.loc[df3['market_name'] == widget1, ('product', 'Veg_Class', 'size', 'package')]
class_list2 = list(A['product'].unique())

widget2 = Dropdown(options = class_list2)
B = A.loc[A['product'] == widget2, ('Veg_Class', 'size', 'package' )]
class_list3 = list(B['Veg_Class'].unique())

widget3 = Dropdown(options = class_list3)
C = B.loc[B['Veg_Class'] == widget3, ('size', 'package')]
class_list4 = list(C['size'].unique())

widget4 = Dropdown(options = class_list4)
D = C.loc[C['size'] == widget4]
class_list5 = list(D['package'].unique())

widget1