<a href="https://colab.research.google.com/github/GhazaleZe/Python-Exercises/blob/main/Xarray_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from functools import partial, partialmethod
from numpy.random import default_rng
import numpy as np

In [2]:
potentialCusts = 200
purchaseProb = 0.2

rng = default_rng(seed = 111)
numSims = 100

# create data frame to store simulated demand
newsDF = pd.DataFrame({"simNum": range(1, numSims+1),  # sequence of 1 to 100
                   "demand": rng.binomial(n = potentialCusts,
                                          p = purchaseProb,
                                          size = numSims)})

## google SEARCH PHRASE: get element-wise minimum of two columns in pandas dataframe
newsDF["profit_q42"] = 3 * np.minimum(newsDF.demand,42) - 1 * 42
newsDF["lostSales_q42"] = np.maximum(0,newsDF.demand - 42)

# view first few 5 rows of newsDF
newsDF.iloc[:5,:]

Unnamed: 0,simNum,demand,profit_q42,lostSales_q42
0,1,42,84,0
1,2,47,84,5
2,3,43,84,1
3,4,41,81,0
4,5,38,72,0


In [3]:
from numpy.random import default_rng
import numpy as np
import xarray as xr

rng = default_rng(seed = 111)  ## set random seed
demand = rng.binomial(n=200,p=0.2,size=100)   ## get demand values

## make data array
xr.DataArray(data = demand)

In [4]:
xr.DataArray(data = demand,  dims= 'draw')

In [5]:
xr.DataArray(data = demand, dims = 'draw',coords = {"draw": np.arange(100)+1}, name = "demand")

In [7]:
## explicit labeling of coordinates - must use name now to create dataset later
demandDA = xr.DataArray(data = demand, coords = {"draw": np.arange(100)+1}, name = "demand")
demandDA

In [6]:
## creating a DataArray of order quantities - must use name now to create dataset later
orderDA = xr.DataArray(data = np.arange(25,51),
                       coords = {"orderQtyIndex": np.arange(25,51)},
                       name = "orderQty")
orderDA

In [13]:
# create dataset by combining data arrays
newsvDS = xr.merge([demandDA,orderDA])
newsvDS

In [14]:
newsvDS.to_dataframe().head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,demand,orderQty
draw,orderQtyIndex,Unnamed: 2_level_1,Unnamed: 3_level_1
1,25,42,25
1,26,42,26
1,27,42,27
1,28,42,28
1,29,42,29
1,30,42,30
1,31,42,31
1,32,42,32
1,33,42,33
1,34,42,34


In [19]:
(
    newsvDS
    .assign(soldNewspapers = np.minimum(newsvDS.demand, newsvDS.orderQty))
    .assign(revenue = lambda DS: 3* DS.soldNewspapers)
)



In [22]:
newsvDS = (newsvDS
            .assign(soldNewspapers = np.minimum(newsvDS.demand,newsvDS.orderQty))
            .assign(revenue = lambda DS: 3 * DS.soldNewspapers)
            .assign(expense = 1 * newsvDS.orderQty)
            .assign(profit = lambda DS: DS.revenue - DS.expense)
            .assign(lostSales = np.maximum(0, newsvDS.demand - newsvDS.orderQty))
)

(newsvDS
 .to_dataframe()  #dataframe for printing
 .sample(5, random_state = 111))  ## show five rows of DF

Unnamed: 0_level_0,Unnamed: 1_level_0,demand,orderQty,soldNewspapers,revenue,expense,profit,lostSales
draw,orderQtyIndex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
39,36,35,36,35,105,36,69,0
22,46,28,46,28,84,46,38,0
17,47,37,47,37,111,47,64,0
60,35,41,35,35,105,35,70,6
21,48,38,48,38,114,48,66,0


We can do this insted of the other, when chains of operations are not required.

In [21]:
newsvDS["soldNewspapers"] = np.minimum(newsvDS.demand,newsvDS.orderQty)
newsvDS["expense"] = newsvDS.orderQty
newsvDS["revenue"] = 3 * newsvDS.soldNewspapers
newsvDS["profit"] = newsvDS.revenue - newsvDS.expense
newsvDS["lostSales"] = np.maximum(0, newsvDS.demand - newsvDS.orderQty)
newsvDS

In [23]:
# select a particular value for a dimension
newsvDS.sel(orderQtyIndex = 36) # returns 1-d dataset

In [24]:
# slicing returns all values inside the range (inclusive)
# as long as the index labels are monotonic increasing
newsvDS.sel(orderQtyIndex = slice(36,38))

In [25]:
newsvDS.where(newsvDS.lostSales>0)

In [27]:
(newsvDS.where(newsvDS.lostSales > 0, drop = True)
 .to_dataframe()  #convert to pandas dataframe for printing
 .dropna() # pandas method to remove NaN rows
 .sample(5, random_state = 111))

Unnamed: 0_level_0,Unnamed: 1_level_0,demand,orderQty,soldNewspapers,revenue,expense,profit,lostSales
draw,orderQtyIndex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
30,34,37.0,34.0,34.0,102.0,34.0,68.0,3.0
98,27,47.0,27.0,27.0,81.0,27.0,54.0,20.0
100,26,37.0,26.0,26.0,78.0,26.0,52.0,11.0
83,30,46.0,30.0,30.0,90.0,30.0,60.0,16.0
26,34,38.0,34.0,34.0,102.0,34.0,68.0,4.0


In [28]:
newsvDS.drop_dims("orderQtyIndex")

In [29]:
newsvDS.drop_vars(["revenue","expense"])

In [30]:
newsvDS.to_dataframe().sort_values("profit")

Unnamed: 0_level_0,Unnamed: 1_level_0,demand,orderQty,soldNewspapers,revenue,expense,profit,lostSales
draw,orderQtyIndex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
67,50,27,50,27,81,50,31,0
67,49,27,49,27,81,49,32,0
67,48,27,48,27,81,48,33,0
23,50,28,50,28,84,50,34,0
67,47,27,47,27,81,47,34,0
...,...,...,...,...,...,...,...,...
73,49,49,49,49,147,49,98,0
81,49,49,49,49,147,49,98,0
95,49,50,49,49,147,49,98,1
11,49,49,49,49,147,49,98,0


In [33]:
## collapse the 100 draws into 1 summary statistic
newsvDS.profit.mean(dim = "draw")

In [34]:
## create mean summary stats
(
    newsvDS
    .assign(expProfit = newsvDS.profit.mean(dim="draw"))
    .assign(expLossSales = newsvDS.lostSales.mean(dim="draw"))
)

In [37]:
## find average profit by orderQty
## see documentation here: https://docs.xarray.dev/en/stable/generated/xarray.core.groupby.DatasetGroupBy.mean.html
(
    newsvDS
    .get("profit")
    .groupby("orderQtyIndex")
    .mean(...)
).to_dataframe()

Unnamed: 0_level_0,profit
orderQtyIndex,Unnamed: 1_level_1
25,50.0
26,52.0
27,54.0
28,55.97
29,57.88
30,59.7
31,61.49
32,63.22
33,64.92
34,66.56
