In [1]:
# imports:

import pandas as pd
from datetime import datetime
import time
import seaborn as sns
import datapane as dp
import altair as alt


## Question 1

You are provided with data with 10-mins, 60-mins and 1-day resolution (Merge.csv)
Please merge them into a pandas Dataframe with 2-hours resolution in between 7:00 – 17:00 only as index.
Please take the average of the 10-mins and 60-mins resolution prices during the 2-hours window and forward fill the 1-day resolution prices in the 2-hours window.

In [2]:

# load csv and ensure date time column datatype is datetime object
merge = pd.read_csv("../Data analyst coding challenge/Merge.csv")
merge["Datetime"]= pd.to_datetime(merge["Datetime"], infer_datetime_format=True)
merge.info() # inspect datatype change


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1570 entries, 0 to 1569
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Resolution  1570 non-null   object        
 1   Datetime    1570 non-null   datetime64[ns]
 2   Price       1488 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 36.9+ KB


Price has 82 nulls and I need to fill them with 0's so that the groupby function can aggregate them as numeric values

In [3]:
merge = merge.fillna(0, axis=0)
merge.head(20)

Unnamed: 0,Resolution,Datetime,Price
0,10MIN,2021-11-01 07:00:00,70.0
1,10MIN,2021-11-01 07:10:00,0.0
2,10MIN,2021-11-01 07:20:00,71.5
3,10MIN,2021-11-01 07:30:00,71.375
4,10MIN,2021-11-01 07:40:00,0.0
5,10MIN,2021-11-01 07:50:00,71.325
6,10MIN,2021-11-01 08:00:00,70.9
7,10MIN,2021-11-01 08:10:00,70.65
8,10MIN,2021-11-01 08:20:00,70.65
9,10MIN,2021-11-01 08:30:00,69.975


I need to inspect the unique resolution values so that I know

In [4]:
set(merge["Resolution"].values)

{'10MIN', '1H', 'D'}

Change frequency to 2 hours and aggregate the rest of the prices that fall out of that range. the final dataframe will then forward fill the datetimes > 1 hour (aka 1Day in this df).

In [6]:
# change frequency to 2h and merge
two_hour = merge.groupby([pd.Grouper(key="Datetime", freq="2H", origin="07:00:00")]).mean().reset_index().set_index("Datetime")
# limit time range to 7:00 to 17:00 and forward fill the 1D resolution
final = two_hour.between_time("7:00", "17:00").ffill().reset_index()

In [7]:
final

Unnamed: 0,Datetime,Price
0,2021-11-01 07:00:00,60.792857
1,2021-11-01 09:00:00,71.494643
2,2021-11-01 11:00:00,68.635714
3,2021-11-01 13:00:00,63.314286
4,2021-11-01 15:00:00,65.630357
...,...,...
174,2021-11-30 07:00:00,62.650000
175,2021-11-30 09:00:00,98.826786
176,2021-11-30 11:00:00,96.619643
177,2021-11-30 13:00:00,96.621429


## Question 2

You are provided with a daily energy consumption data from 2016 to date (Consumption.csv).
Please create a Pandas DataFrame with to show the consumption of each year. The expected format is to have the year number as column name and mm-dd as index.
Please also create a seasonal plot showing 5-years (2016-2020) range (shaded) & average (dashed line), and year 2021 (line) & 2022 (line).
Please comment on your observation on the plot

Load data using csv parser:

In [8]:
consumption = pd.read_csv("../Data analyst coding challenge/Consumption.csv")
consumption.head() # inspect structure

Unnamed: 0,Date,Consumption
0,01/01/2016,276.291
1,02/01/2016,294.678
2,03/01/2016,306.3096
3,04/01/2016,370.0566
4,05/01/2016,367.3998


Only given a datetime column and a value column so a bit of preprocessing is needed to get expected format:

In [9]:
print("Attempting to convert date column into datetype object")
try:
    consumption["Date"]= pd.to_datetime(consumption["Date"], infer_datetime_format=True)
except:
    print("WARNING: there are some different date formats in the rows, so can't infer date format")

Attempting to convert date column into datetype object


Since there appears to be multiple formats of dates in this column, the function cant infer format unless it is told about these formats explicitly. I need to create a function that does just that. First I will inspect the data for missing values to ensure that it didn't cause any errors.

In [10]:
# Check for missing values across
print(f"There are: \n {consumption.isna().sum()} \n values missing values across the consumption dataframe rows")

There are: 
 Date           0
Consumption    0
dtype: int64 
 values missing values across the consumption dataframe rows


There are no across dupes it is important to check because aggregating the table later will not aggregate across dupes with the groupby method.

In [11]:
consumption.duplicated().sum() # there are zero across duplicates

0

In [12]:
# date duplicates:
consumption["Date"].duplicated().sum() 

18

In [13]:
# check why the duplicates occur with consumption value
consumption.loc[consumption["Date"].duplicated()]


Unnamed: 0,Date,Consumption
1766,2020111,273.132
1767,2020112,308.0268
1768,2020113,356.805
1769,2020114,387.2934
1770,2020115,392.7366
1771,2020116,380.538
1772,2020117,330.0102
1773,2020118,330.2856
1774,2020119,396.2682
1796,2020121,456.9696


In [14]:
consumption.loc[consumption["Date"]=="2020125"] # The values within the day 1 to 29 are quite low in differences how to handle the duplicates

Unnamed: 0,Date,Consumption
1485,2020125,406.9764
1800,2020125,401.6142


In [16]:
# consumption dupes:
consumption["Consumption"].duplicated().sum() # 177 days are the same consumption 
consumption.loc[consumption["Consumption"].duplicated()] # check the duplicate values
consumption.loc[consumption["Consumption"].astype("str").str.contains("245.43")] # validate why they are

Unnamed: 0,Date,Consumption
498,13/05/2017,245.43
1717,2020913,245.43
2453,19/09/2022,245.43


After inspecting the date duplicates I have found the two patterns of datestamp types:
* 2 types of date formats: dd/mm/yyyy and yyyymmdd
    * Dates with the dd/mm/yyyy format tend to usually be part of the 177 consumption duplicates
    * Dates with the yyyymmdd are all part of the 18 date duplicates and don't vary too greatly with 50-100 units of consumption variance


I will handle the incosistencies by creating a function that handles different type of date stamps and converts them into date time objects.

In [17]:
dates = consumption["Date"].tolist() # used to test function

def date_conversion(date_input, conversion_pattern=None):
    """
    A function that converts a string with so far only 2 defined formats into a datetime object.

    Args:
        date_input (str): a date string

    Returns:
        date_time_object : date object of converted string given the 2 types of formats.

    """
    if conversion_pattern == None:
        format1 = "%d/%m/%Y" # equivalent to dd/mm/yyyy
        format2 = "%Y%m%d" # equivalent to yyyymmdd

        try:
            date_obj = datetime.strptime(date_input, format1)
        except:
            date_obj = datetime.strptime(date_input, format2)

        
    else:
        date_obj = datetime.strptime(date_input, conversion_pattern)

    return date_obj

Now testing if function works on a list of dates:

In [18]:
converted_dates = [date_conversion(d) for d in dates]
converted_dates[0:20]

[datetime.datetime(2016, 1, 1, 0, 0),
 datetime.datetime(2016, 1, 2, 0, 0),
 datetime.datetime(2016, 1, 3, 0, 0),
 datetime.datetime(2016, 1, 4, 0, 0),
 datetime.datetime(2016, 1, 5, 0, 0),
 datetime.datetime(2016, 1, 6, 0, 0),
 datetime.datetime(2016, 1, 7, 0, 0),
 datetime.datetime(2016, 1, 8, 0, 0),
 datetime.datetime(2016, 1, 9, 0, 0),
 datetime.datetime(2016, 1, 10, 0, 0),
 datetime.datetime(2016, 1, 11, 0, 0),
 datetime.datetime(2016, 1, 12, 0, 0),
 datetime.datetime(2016, 1, 13, 0, 0),
 datetime.datetime(2016, 1, 14, 0, 0),
 datetime.datetime(2016, 1, 15, 0, 0),
 datetime.datetime(2016, 1, 16, 0, 0),
 datetime.datetime(2016, 1, 17, 0, 0),
 datetime.datetime(2016, 1, 18, 0, 0),
 datetime.datetime(2016, 1, 19, 0, 0),
 datetime.datetime(2016, 1, 20, 0, 0)]

In [19]:
# shows the unique years in all the converted dates proving it worked well
years = [d.year for d in converted_dates]

unique_years = list(set(years))
unique_years

[2016, 2017, 2018, 2019, 2020, 2021, 2022]

No errors so the conversion worked for both datetime patterns. Now to apply it to main dataframe.

In [20]:
consumption["Date"] = [date_conversion(i) for i in consumption["Date"]]
consumption["Year"] = [i.strftime("%Y") for i in consumption["Date"]]
consumption["Month"] = [i.strftime("%m") for i in consumption["Date"]]
consumption["index"] = [i.strftime("%m-%d") for i in consumption["Date"]]
consumption = consumption.set_index("index")

Previewing preprocess changes:

In [21]:
consumption

Unnamed: 0_level_0,Date,Consumption,Year,Month
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01-01,2016-01-01,276.2910,2016,01
01-02,2016-01-02,294.6780,2016,01
01-03,2016-01-03,306.3096,2016,01
01-04,2016-01-04,370.0566,2016,01
01-05,2016-01-05,367.3998,2016,01
...,...,...,...,...
09-29,2022-09-29,258.0822,2022,09
09-30,2022-09-30,242.5950,2022,09
10-01,2022-10-01,188.0658,2022,10
10-02,2022-10-02,190.6092,2022,10


In [22]:
cleaned_consumption_df = consumption.drop("Date", axis=1)[["Year", "Consumption", "Month"]]
# Dealing with duplicates:
# There are 5 duplicates within cleaned_consumption
len(cleaned_consumption_df.loc[cleaned_consumption_df.duplicated()])

5

These 5 dupes will be handled later in aggregation

In [23]:
cleaned_consumption_df.loc[cleaned_consumption_df.duplicated()]

Unnamed: 0_level_0,Year,Consumption,Month
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
02-06,2017,424.7316,2
05-29,2017,269.7138,5
08-21,2017,257.337,8
08-16,2018,257.8068,8
08-18,2021,246.5478,8


Now to add the season column for graphing: I will map a dictionary to each month that falls within a season

In [24]:
def season_mapping(month):
    season_dict = { "Winter": ["12","01","02"],
                    "Spring": ["03", "04", "05"],
                    "Summer": ["06", "07", "08"],
                    "Autumn": ["09", "10", "11"]

    }
    for k,v in season_dict.items():
        if month in v:
            return k

In [26]:
# applying map to dataframe
cleaned_consumption_df["Season"]= [season_mapping(month) for month in cleaned_consumption_df["Month"]]
cleaned_consumption_df

Unnamed: 0_level_0,Year,Consumption,Month,Season
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01-01,2016,276.2910,01,Winter
01-02,2016,294.6780,01,Winter
01-03,2016,306.3096,01,Winter
01-04,2016,370.0566,01,Winter
01-05,2016,367.3998,01,Winter
...,...,...,...,...
09-29,2022,258.0822,09,Autumn
09-30,2022,242.5950,09,Autumn
10-01,2022,188.0658,10,Autumn
10-02,2022,190.6092,10,Autumn


In [27]:
# aggregate by year and season
season = cleaned_consumption_df.groupby(["Year","Season"]).agg(Consumption=("Consumption", "mean")).reset_index()
season_filtered = season.loc[season["Year"].astype("int") <= 2020].copy()
season_filtered

Unnamed: 0,Year,Season,Consumption
0,2016,Autumn,335.411743
1,2016,Spring,307.234761
2,2016,Summer,257.039413
3,2016,Winter,374.604884
4,2017,Autumn,330.742048
5,2017,Spring,305.508757
6,2017,Summer,253.497776
7,2017,Winter,402.2298
8,2018,Autumn,320.067673
9,2018,Spring,304.233007


Season_filtered df is set up for graphing using the altair library since it provides interactive functions and works well with that data dashboard found here: [2-consumption.html](https://github.com/PaulWill92/time_series_assessment/blob/main/tasks/2-consumption.html). You can view this dashboard by downloading it and opening the file in your web browser for an interactive experience that lets you play with the chart and filter the data table with queries.

In [29]:
# Create chart:

line = alt.Chart(season_filtered).mark_line(point=True).encode(
    alt.X("Year:T"),
    alt.Y("Consumption:Q"),
    alt.Color("Season"),
    alt.OpacityValue(0.7),
    tooltip= [alt.Tooltip("Season"),
              alt.Tooltip("Year:T"),
              alt.Tooltip("Consumption")
             ]
).interactive().properties()

# view graph:
line

In [31]:
# add average line for 2021 and 2022
average_2021_2022 = season.loc[season["Year"].astype("int") > 2020].copy().groupby("Year").mean().reset_index().replace({"2021": "2021 Average", "2022":"2022 Average"})


avg_line = alt.Chart(average_2021_2022).mark_rule(color="black", strokeDash=[2,2]).encode(
    alt.Y("Consumption:Q"),
    alt.SizeValue(2),
    alt.Color("Year"),


)
line + avg_line

In [33]:
graph = line+avg_line # saving to variable to process on data dashboard 

In [34]:
# Create dashboard
app = dp.App( dp.Text("## Energy Consumption Analysis Across 5 Years"),
              dp.Plot(graph, caption="Units of Energy Consumption Per Year"),
              dp.DataTable(cleaned_consumption_df, caption="Consumption Dataset"),
              dp.Text("### Analysis: \nEnergy consumption seems to go up based on how cold the season is that means winter and autumn, are generally the colder of the seasons, thus more energy is consumed than the other seasons. The Years 2017 and 2018 most likely had the coldest winters as their consumption units peaked."))
app.save(path="./consumption.html", open=True)




Bokeh version 2.3.2 is not supported, these plots may not display correctly, please install version ~=2.4.2


App saved to ././consumption.html

### Q2 Analysis:

Energy consumption seems to go up based on how cold the season is that means winter and autumn, are generally the colder of the seasons, thus more energy is consumed than the other seasons. The Years 2017 and 2018 most likely had the coldest winters as their consumption units peaked.

## Question 3

A robot moves around a flat surface from position (0,0). It accepts instructions, and moves accordingly. For example:
“BEGIN”
LEFT 3
UP 5
RIGHT 4
DOWN 7
The first word indicates direction and the number shows steps. The robot will stop moving with instruction “STOP”.
Please write a function, which accepts instructions as a list. When first “STOP” instruction is given, it calculates the distance of Robot from the original position (0,0)

In [20]:
def robot(move):
    command_count = len(move)

    instructions = ["begin", "right", "left", "up", "down", "stop"]

    posUp = 0
    posDown = 0
    posLeft = 0
    posRight = 0
    
    
    for i in range(0, command_count):
        if (move[i].lower() not in instructions):
                print(f"The robot is confused {move[i]} is not a valid command! please re-enter commands that look like this: {instructions}")
                break
        else:

            if (move[i].lower() == "begin"):
                posUp = posDown = posLeft = posRight = 0
                print(f"The robot is starting from the begining position {posRight-posLeft,posUp-posDown}")
            elif (move[i].lower() == "right"):
                posRight +=1
                print(f"The robot moved right one step, current positon is: {posRight-posLeft,posUp-posDown}")
                time.sleep(.5)
            elif (move[i].lower() == "left"):
                posLeft +=1
                print(f"The robot moved left one step current positon is: {posRight-posLeft,posUp-posDown}")
                time.sleep(.5)
            elif (move[i].lower() == "up"):
                posUp +=1
                print(f"The robot moved up one step, current positon is: {posRight-posLeft,posUp-posDown}")
                time.sleep(.5)
            elif (move[i].lower() == "down"):
                posDown +=1
                print(f"The robot moved down one step, current positon is: {posRight-posLeft,posUp-posDown}")
                time.sleep(.5)
            elif (move[i].lower()== "stop"):
                print(f"The robot has stopped! current positon is: {posRight-posLeft,posUp-posDown}")
                break

In [22]:
# Test robot:

moves = ["Begin", "Left", "Left", "Left", "Up", "Up", "Up", "Up", "Up", "Right", "Right", "Right", "Right", "Down", "Down", "Down", "Down", "Down", "Down", "Down", "STOP" ]
robot(moves)

The robot is starting from the begining position (0, 0)
The robot moved left one step current positon is: (-1, 0)
The robot moved left one step current positon is: (-2, 0)
The robot moved left one step current positon is: (-3, 0)
The robot moved up one step, current positon is: (-3, 1)
The robot moved up one step, current positon is: (-3, 2)
The robot moved up one step, current positon is: (-3, 3)
The robot moved up one step, current positon is: (-3, 4)
The robot moved up one step, current positon is: (-3, 5)
The robot moved right one step, current positon is: (-2, 5)
The robot moved right one step, current positon is: (-1, 5)
The robot moved right one step, current positon is: (0, 5)
The robot moved right one step, current positon is: (1, 5)
The robot moved down one step, current positon is: (1, 4)
The robot moved down one step, current positon is: (1, 3)
The robot moved down one step, current positon is: (1, 2)
The robot moved down one step, current positon is: (1, 1)
The robot moved

# Question 4 

You are provided with sample trades data in a data file (trades.csv). It contains a collection of trade data during 5 working days with volume and price. You are asked to process the sample data into a data set for creating the candlestick plots, with open, high, low & close prices (OHLC) and total volume for each product and contract, over a time interval.
You are expected to write a function to create a dataframe contains OHLC and trading volume with (begin, end, product(s), freq) as input. It should be able take freq input, e.g., 15MIN, 1H, 1D.
Expected Result – A python model file detailing process with comments: Note:
• The product “Emission - Venue A” and “Emission - Venue B” are the same product trading in two different venues, please combine them when queried
• If product list has more than one contract, please generate OHLC and volume data for each contract
• Please limit output within trading hours, i.e., 7:00 – 17:00, except when freq >= 1D
• Please do not use OHLC resampler

Parse data with pandas

In [35]:
trades = pd.read_csv("../Data analyst coding challenge/Trades.csv")
trades.info() # inspect data types and missing

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70095 entries, 0 to 70094
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   TradeDateTime  70095 non-null  object 
 1   Product        70095 non-null  object 
 2   Contract       70095 non-null  object 
 3   Price          70095 non-null  float64
 4   Quantity       70095 non-null  int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 2.7+ MB


convert datetime to object

In [37]:
trades["TradeDateTime"]= pd.to_datetime(trades["TradeDateTime"], infer_datetime_format=True)
trades

Unnamed: 0,TradeDateTime,Product,Contract,Price,Quantity
0,2022-04-18 00:37:00,Emission - Venue A,DA,20.5000,60
1,2022-04-18 00:42:00,Emission - Venue A,DA,20.6500,30
2,2022-04-18 00:42:00,Emission - Venue A,DA,20.6500,30
3,2022-04-18 00:50:00,Emission - Venue A,DA,20.6500,500
4,2022-04-18 00:54:00,Emission - Venue A,DA,20.6500,100
...,...,...,...,...,...
70090,2022-04-21 21:13:00,Emission - Venue A,DA,18.6375,70
70091,2022-04-21 21:56:00,Emission - Venue A,DA,18.3500,100
70092,2022-04-21 22:05:00,Emission - Venue A,DA,18.3000,4
70093,2022-04-21 22:07:00,Emission - Venue A,DA,18.3000,77


Combine VenueA and Venue B

In [39]:
trades = trades.replace({"Emission - Venue B": "Emission", "Emission - Venue A": "Emission"})
trades["Product"].value_counts()

Energy      54552
Emission    15543
Name: Product, dtype: int64

Since I am not allowed to use an OHLC resampler, I need to preprocess the rows to manually do it.

In [40]:
## split date and time
trades["Date"] = [x.strftime("%Y-%m-%d") for x in trades["TradeDateTime"]]
trades["Time"] = [x.strftime("%H:%M:%S") for x in trades["TradeDateTime"]]
trades = trades.set_index("TradeDateTime").between_time("7:00", "17:00") # for OHLC data this range is all we care about
trades_agg = trades.groupby(["Product", "TradeDateTime", "Time", "Contract"]).mean().reset_index()

In [41]:
trades_agg

Unnamed: 0,Product,TradeDateTime,Time,Contract,Price,Quantity
0,Emission,2022-04-18 07:02:00,07:02:00,M01,19.200000,6.666667
1,Emission,2022-04-18 07:06:00,07:06:00,M01,18.998750,10.000000
2,Emission,2022-04-18 07:38:00,07:38:00,M01,19.173750,5.000000
3,Emission,2022-04-18 07:39:00,07:39:00,M01,19.187500,15.000000
4,Emission,2022-04-18 07:40:00,07:40:00,M01,19.152500,5.000000
...,...,...,...,...,...,...
4663,Energy,2022-04-21 16:55:00,16:55:00,Q01,22.779853,1.235294
4664,Energy,2022-04-21 16:56:00,16:56:00,Q01,22.749167,1.000000
4665,Energy,2022-04-21 16:57:00,16:57:00,Q01,22.758947,1.210526
4666,Energy,2022-04-21 16:58:00,16:58:00,Q01,22.745547,1.250000


In [42]:
# add hour and minute preprocessing cols for future function
trades_agg["Hour"] = [int(x.strftime("%H")) for x in trades_agg["TradeDateTime"]]
trades_agg["Minute"] = [int(x.strftime("%M")) for x in trades_agg["TradeDateTime"]]

In [43]:
trades_agg

Unnamed: 0,Product,TradeDateTime,Time,Contract,Price,Quantity,Hour,Minute
0,Emission,2022-04-18 07:02:00,07:02:00,M01,19.200000,6.666667,7,2
1,Emission,2022-04-18 07:06:00,07:06:00,M01,18.998750,10.000000,7,6
2,Emission,2022-04-18 07:38:00,07:38:00,M01,19.173750,5.000000,7,38
3,Emission,2022-04-18 07:39:00,07:39:00,M01,19.187500,15.000000,7,39
4,Emission,2022-04-18 07:40:00,07:40:00,M01,19.152500,5.000000,7,40
...,...,...,...,...,...,...,...,...
4663,Energy,2022-04-21 16:55:00,16:55:00,Q01,22.779853,1.235294,16,55
4664,Energy,2022-04-21 16:56:00,16:56:00,Q01,22.749167,1.000000,16,56
4665,Energy,2022-04-21 16:57:00,16:57:00,Q01,22.758947,1.210526,16,57
4666,Energy,2022-04-21 16:58:00,16:58:00,Q01,22.745547,1.250000,16,58


In [44]:
# sorting the index by time so that when product is filtered out I can have a distinct is_open column to allow me to calculate HL
trades_agg = trades_agg.set_index("TradeDateTime").sort_index().reset_index()

In [46]:
# Preprocessed output:
trades_agg

Unnamed: 0,TradeDateTime,Product,Time,Contract,Price,Quantity,Hour,Minute
0,2022-04-18 07:00:00,Energy,07:00:00,Q01,24.058147,1.534483,7,0
1,2022-04-18 07:01:00,Energy,07:01:00,Q01,23.981389,1.111111,7,1
2,2022-04-18 07:02:00,Energy,07:02:00,Q01,23.972727,1.181818,7,2
3,2022-04-18 07:02:00,Emission,07:02:00,M01,19.200000,6.666667,7,2
4,2022-04-18 07:03:00,Energy,07:03:00,Q01,23.957024,1.380952,7,3
...,...,...,...,...,...,...,...,...
4663,2022-04-21 16:58:00,Emission,16:58:00,M01,18.271250,5.000000,16,58
4664,2022-04-21 16:58:00,Emission,16:58:00,DA,18.630000,74.400000,16,58
4665,2022-04-21 16:58:00,Energy,16:58:00,Q01,22.745547,1.250000,16,58
4666,2022-04-21 16:59:00,Emission,16:59:00,M01,18.328125,5.000000,16,59


Creating a function to calculate open/close. Assumptions: the market closes at 17:00 on the dot so close price will be 16:59

In [50]:
# filter dataframes and get each products open and close

df_holder = []
unique_products = trades_agg["Product"].unique().tolist()

for product in unique_products:
    prod_df = trades_agg.loc[trades_agg["Product"]==product]
    df_holder.append(prod_df.set_index("TradeDateTime"))

In [55]:
open_count= 0 # should equal 4 since there are 4 open times
close_count = 0

for i, (H,M) in enumerate(zip(df_holder[0]["Hour"], df_holder[0]["Minute"])):
    if (H,M) == (7,0):
        open_count +=1
    if (H,M) == (16,59):
        close_count+=1

In [57]:
print(open_count)
print(close_count)

4
4


In [59]:
open_count2= 0 # should equal 4 since there are 4 open times
close_count2 = 0

for i, (H,M) in enumerate(zip(df_holder[1]["Hour"], df_holder[1]["Minute"])):
    if (H,M) == (7,0):
        open_count2 +=1
    if (H,M) == (16,59):
        close_count2 +=1

In [60]:
print(open_count2)
print(close_count2)

0
4


it seems there are no open prices for the emissions dataframe at 7:00 so the next time will be the next entry after 7:00