In [1]:
%matplotlib notebook
%matplotlib inline

In [2]:
# importing 'pandas' and 'numpy' libraries for the 'Water Allocation Price Prediction' model development project
import pandas as pd
import numpy as np

In [3]:
# reading the raw water market transactions data
raw_data = pd.read_csv('Downloads/LDI project data/ALL_water_share_trading.csv')

In [4]:
# displaying the first rows of the data for an initial analysis of its features
raw_data.head()

Unnamed: 0,Status,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Water System Source,Reliability,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($)
0,Recorded,12/7/2013,24/07/2013,23/08/2013,WEE****24,Goulburn,High,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0
1,Recorded,6/1/2020,9/1/2020,10/3/2020,WEE****26,Murray,High,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52
2,Recorded,30/04/2009,13/07/2009,11/9/2009,WEE****80,Goulburn,Low,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,13.4,0.07
3,Recorded,13/04/2018,26/04/2018,1/5/2018,WEE****36,Goulburn,High,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0
4,Recorded,9/9/2022,28/09/2022,13/10/2022,WEE****71,Murray,High,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0


In [5]:
# obtaining information about the raw water market data to see if the data contains any NaN values to be excluded
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47622 entries, 0 to 47621
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Status                     47622 non-null  object 
 1   Application Creation Date  47622 non-null  object 
 2   Approved / Refused Date    47622 non-null  object 
 3   Recorded / Rejected Date   47622 non-null  object 
 4   Water Share ID             47622 non-null  object 
 5   Water System Source        47622 non-null  object 
 6   Reliability                47622 non-null  object 
 7   Trading Zone Source        47622 non-null  object 
 8   Seller Water Authority     47622 non-null  object 
 9   Seller Trading Zone Use    47622 non-null  object 
 10  Buyer Water Authority      47622 non-null  object 
 11  Buyer Trading Zone Use     47622 non-null  object 
 12  Volume Traded (ML)         47622 non-null  float64
 13  Price Per ML ($)           47622 non-null  flo

In [6]:
# obtaining necessary information about the status of each transaction made in the raw data
raw_data['Status'].value_counts()

Recorded    42768
Rejected     2943
Refused      1681
Expired       230
Name: Status, dtype: int64

In [7]:
# obtaining necessary information about the reliability of each transaction made in the raw data
raw_data['Reliability'].value_counts()

High     35714
Low      11635
Spill      273
Name: Reliability, dtype: int64

In [8]:
# excluding all the transactions from the data, except for ones with the 'High' reliability and the 'Recorded' status
raw_data = raw_data[raw_data['Reliability'] == 'High']
raw_data = raw_data[raw_data['Status'] == 'Recorded']

In [9]:
# displaying the modified data to clearly see the changes made
raw_data.head()

Unnamed: 0,Status,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Water System Source,Reliability,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($)
0,Recorded,12/7/2013,24/07/2013,23/08/2013,WEE****24,Goulburn,High,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0
1,Recorded,6/1/2020,9/1/2020,10/3/2020,WEE****26,Murray,High,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52
3,Recorded,13/04/2018,26/04/2018,1/5/2018,WEE****36,Goulburn,High,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0
4,Recorded,9/9/2022,28/09/2022,13/10/2022,WEE****71,Murray,High,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0
5,Recorded,2/3/2022,11/3/2022,7/4/2022,WEE****72,Murray,High,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0


In [10]:
# removing the 'Status' and 'Reliability' columns from the dataset as these are the same for each transaction
raw_data = raw_data.drop("Reliability", axis = 1)
raw_data = raw_data.drop("Status", axis = 1)

In [11]:
# displaying the modified data to clearly see the changes made
raw_data.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Water System Source,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($)
0,12/7/2013,24/07/2013,23/08/2013,WEE****24,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0
1,6/1/2020,9/1/2020,10/3/2020,WEE****26,Murray,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52
3,13/04/2018,26/04/2018,1/5/2018,WEE****36,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0
4,9/9/2022,28/09/2022,13/10/2022,WEE****71,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0
5,2/3/2022,11/3/2022,7/4/2022,WEE****72,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0


In [12]:
# converting the setting of the 'Approved / Refused Data' column to the 'datetime' format 
raw_data["Approved / Refused Date"] = pd.to_datetime(raw_data["Approved / Refused Date"], format = '%d/%m/%Y')

In [13]:
# modifying the current datetime format of the 'Approved / Refused Date' and formatting it separately as years and months
# for further data processing
raw_data["Approved Year"] = raw_data["Approved / Refused Date"].dt.year
raw_data["Approved Month"] = raw_data["Approved / Refused Date"].dt.month

In [14]:
# displaying the modified data to clearly see the changes made
raw_data.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Water System Source,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,12/7/2013,2013-07-24,23/08/2013,WEE****24,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0,2013,7
1,6/1/2020,2020-01-09,10/3/2020,WEE****26,Murray,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52,2020,1
3,13/04/2018,2018-04-26,1/5/2018,WEE****36,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0,2018,4
4,9/9/2022,2022-09-28,13/10/2022,WEE****71,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0,2022,9
5,2/3/2022,2022-03-11,7/4/2022,WEE****72,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2022,3


In [15]:
# obtaining necessary information about the water system source in the raw data
raw_data['Water System Source'].value_counts()

Murray                15886
Goulburn              12818
Thomson/Macalister     1446
Campaspe                478
Loddon                  372
Ovens                   357
Broken                  267
Werribee                224
Bullarook                15
Name: Water System Source, dtype: int64

In [16]:
# extracting all the transaction data from the processed raw data for the 'Murray' water system source
data_Murray = raw_data[(raw_data["Water System Source"] == "Murray")]

In [17]:
# displaying water transaction data for the 'Murray' water system source
data_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Water System Source,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
1,6/1/2020,2020-01-09,10/3/2020,WEE****26,Murray,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52,2020,1
4,9/9/2022,2022-09-28,13/10/2022,WEE****71,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0,2022,9
5,2/3/2022,2022-03-11,7/4/2022,WEE****72,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2022,3
9,14/06/2018,2018-06-15,4/7/2018,WEE****90,Murray,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,5.0,3200.0,2018,6
10,3/4/2014,2014-04-16,13/06/2014,WEE****65,Murray,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,1.0,1431.0,2014,4


In [18]:
# removing the 'Water System Source' column from the dataset as it is same for each transaction
data_Murray = data_Murray.drop("Water System Source", axis = 1)

In [19]:
# displaying the modified data to clearly see the changes made
data_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
1,6/1/2020,2020-01-09,10/3/2020,WEE****26,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52,2020,1
4,9/9/2022,2022-09-28,13/10/2022,WEE****71,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0,2022,9
5,2/3/2022,2022-03-11,7/4/2022,WEE****72,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2022,3
9,14/06/2018,2018-06-15,4/7/2018,WEE****90,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,5.0,3200.0,2018,6
10,3/4/2014,2014-04-16,13/06/2014,WEE****65,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,1.0,1431.0,2014,4


In [20]:
# obtaining necessary information about the trading zone source in the transaction data for Murray
data_Murray["Trading Zone Source"].value_counts()

7 VIC Murray - Barmah to SA      12259
6 VIC Murray - Dart to Barmah     3256
6B Lower Broken Creek              371
Name: Trading Zone Source, dtype: int64

In [21]:
# extracting all the transaction data from the Murray transaction data for the chosen trading zone source
data_Murray = data_Murray[(data_Murray["Trading Zone Source"] == "7 VIC Murray - Barmah to SA")]

In [22]:
# displaying the modified data to clearly see the changes made
data_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
1,6/1/2020,2020-01-09,10/3/2020,WEE****26,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52,2020,1
4,9/9/2022,2022-09-28,13/10/2022,WEE****71,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0,2022,9
5,2/3/2022,2022-03-11,7/4/2022,WEE****72,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2022,3
9,14/06/2018,2018-06-15,4/7/2018,WEE****90,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,5.0,3200.0,2018,6
10,3/4/2014,2014-04-16,13/06/2014,WEE****65,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,1.0,1431.0,2014,4


In [23]:
# removing the 'Trading Zone Source' column from the dataset as it is same for each transaction
data_Murray = data_Murray.drop("Trading Zone Source", axis = 1)

In [24]:
# displaying the modified data to clearly see the changes made
data_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
1,6/1/2020,2020-01-09,10/3/2020,WEE****26,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52,2020,1
4,9/9/2022,2022-09-28,13/10/2022,WEE****71,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0,2022,9
5,2/3/2022,2022-03-11,7/4/2022,WEE****72,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2022,3
9,14/06/2018,2018-06-15,4/7/2018,WEE****90,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,5.0,3200.0,2018,6
10,3/4/2014,2014-04-16,13/06/2014,WEE****65,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,1.0,1431.0,2014,4


In [25]:
# obtaining necessary information about different seller water authorities in the transaction data for Murray
data_Murray["Seller Water Authority"].value_counts()

Lower Murray Water       6291
Goulburn-Murray Water    5968
Name: Seller Water Authority, dtype: int64

In [26]:
# extracting all the transaction data from the Murray transaction data for
# the 'Lower Murray Water' and 'Goulburn-Murray Water' selling authorities
data_Murray_Lower_Murray = data_Murray[(data_Murray["Seller Water Authority"] == "Lower Murray Water")]
data_Murray_Goulburn_Murray = data_Murray[(data_Murray["Seller Water Authority"] == "Goulburn-Murray Water")]

In [27]:
# displaying the transaction data of Murray for the 'Lower Murray Water' selling authority
data_Murray_Lower_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
1,6/1/2020,2020-01-09,10/3/2020,WEE****26,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.5,1219.52,2020,1
9,14/06/2018,2018-06-15,4/7/2018,WEE****90,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,5.0,3200.0,2018,6
13,23/12/2010,2011-01-14,23/02/2011,WEE****10,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,0.2,0.0,2011,1
24,31/03/2020,2020-04-04,27/04/2020,WEE****19,Lower Murray Water,7 VIC Murray - Barmah to SA,Lower Murray Water,7 VIC Murray - Barmah to SA,2.0,6500.0,2020,4
25,17/12/2015,2015-12-22,8/2/2016,WEE****96,Lower Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,20.0,3200.0,2015,12


In [28]:
# displaying the transaction data of Murray for the 'Goulburn-Murray Water' selling authority
data_Murray_Goulburn_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
4,9/9/2022,2022-09-28,13/10/2022,WEE****71,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,6.3,7700.0,2022,9
5,2/3/2022,2022-03-11,7/4/2022,WEE****72,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2022,3
10,3/4/2014,2014-04-16,13/06/2014,WEE****65,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,1.0,1431.0,2014,4
11,16/10/2013,2013-10-24,4/12/2013,WEE****92,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,1.0,1.0,2013,10
14,9/3/2023,2023-03-21,9/5/2023,WEE****37,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,Goulburn-Murray Water,7 VIC Murray - Barmah to SA,2.0,0.0,2023,3


In [29]:
# creating a list of all the remaining features of the data processed so far
features = list(data_Murray_Lower_Murray.columns.values)

# creating a list of data features which are desired to be removed from the data for further processing
remove_features = features[:8]

# removing unwanted non-numeric data features from both datasets
data_Murray_Lower_Murray = data_Murray_Lower_Murray.drop(remove_features, axis = 1)
data_Murray_Goulburn_Murray = data_Murray_Goulburn_Murray.drop(remove_features, axis = 1)

In [30]:
# displaying the modified data to clearly see the changes made
data_Murray_Lower_Murray

Unnamed: 0,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
1,0.5,1219.52,2020,1
9,5.0,3200.00,2018,6
13,0.2,0.00,2011,1
24,2.0,6500.00,2020,4
25,20.0,3200.00,2015,12
...,...,...,...,...
47574,4.2,1500.00,2014,8
47575,2.0,1400.00,2013,6
47591,94.3,0.00,2014,6
47599,32.4,2400.00,2009,9


In [31]:
# displaying the modified data to clearly see the changes made
data_Murray_Goulburn_Murray

Unnamed: 0,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
4,6.3,7700.0,2022,9
5,2.0,0.0,2022,3
10,1.0,1431.0,2014,4
11,1.0,1.0,2013,10
14,2.0,0.0,2023,3
...,...,...,...,...
47596,40.0,1400.0,2014,1
47606,10.0,1400.0,2013,1
47612,20.0,1780.0,2011,7
47615,90.5,1890.0,2012,1


In [32]:
# multiplying traded volume with its corresponding price to then determine volume weighted average price for each month
volume_times_price_1 = data_Murray_Lower_Murray['Volume Traded (ML)'] * data_Murray_Lower_Murray['Price Per ML ($)']
volume_times_price_2 = data_Murray_Goulburn_Murray['Volume Traded (ML)'] * data_Murray_Goulburn_Murray['Price Per ML ($)']

# integrating this feature to both datasets as an additional feature/column
data_Murray_Lower_Murray['Volume Traded * Price Per ML'] = volume_times_price_1
data_Murray_Goulburn_Murray['Volume Traded * Price Per ML'] = volume_times_price_2

In [33]:
# displaying the modified data to clearly see the changes made
data_Murray_Lower_Murray

Unnamed: 0,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month,Volume Traded * Price Per ML
1,0.5,1219.52,2020,1,609.76
9,5.0,3200.00,2018,6,16000.00
13,0.2,0.00,2011,1,0.00
24,2.0,6500.00,2020,4,13000.00
25,20.0,3200.00,2015,12,64000.00
...,...,...,...,...,...
47574,4.2,1500.00,2014,8,6300.00
47575,2.0,1400.00,2013,6,2800.00
47591,94.3,0.00,2014,6,0.00
47599,32.4,2400.00,2009,9,77760.00


In [34]:
# displaying the modified data to clearly see the changes made
data_Murray_Goulburn_Murray

Unnamed: 0,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month,Volume Traded * Price Per ML
4,6.3,7700.0,2022,9,48510.0
5,2.0,0.0,2022,3,0.0
10,1.0,1431.0,2014,4,1431.0
11,1.0,1.0,2013,10,1.0
14,2.0,0.0,2023,3,0.0
...,...,...,...,...,...
47596,40.0,1400.0,2014,1,56000.0
47606,10.0,1400.0,2013,1,14000.0
47612,20.0,1780.0,2011,7,35600.0
47615,90.5,1890.0,2012,1,171045.0


In [35]:
# creating a list of each representative year in the processed data
year_range_1 = np.unique(data_Murray_Lower_Murray['Approved Year'].values)
year_range_2 = np.unique(data_Murray_Goulburn_Murray['Approved Year'].values)

# creating an empty list for the below-mentioned reason
yearly_data_1 = []
yearly_data_2 = []

# collecting traded water volume and allocation price data for each single year in the data
for i in range(len(year_range_1)):
    yearly_data_1.append(data_Murray_Lower_Murray[data_Murray_Lower_Murray["Approved Year"].between(year_range_1[i], year_range_1[i])])
    
for i in range(len(year_range_2)):
    yearly_data_2.append(data_Murray_Goulburn_Murray[data_Murray_Goulburn_Murray["Approved Year"].between(year_range_2[i], year_range_2[i])])

In [36]:
# displaying the 'yearly_data' of Murray for the 'Lower Murray' selling authority
yearly_data_1

[       Volume Traded (ML)  Price Per ML ($)  Approved Year  Approved Month  \
 296                  27.2            882.35           2009               6   
 488                  62.8           2000.00           2009               7   
 516                 994.0           2000.04           2009              12   
 1025                 54.0           2350.00           2009               9   
 1049                250.0           2300.00           2009               8   
 ...                   ...               ...            ...             ...   
 47275               100.0           2400.00           2009              12   
 47408                 1.5              0.00           2009              10   
 47431                30.0           2300.00           2009               7   
 47530               404.2           1800.00           2009              11   
 47599                32.4           2400.00           2009               9   
 
        Volume Traded * Price Per ML  
 296       

In [37]:
# displaying the 'yearly_data' of Murray for the 'Goulburn_Murray' selling authority
yearly_data_2

[       Volume Traded (ML)  Price Per ML ($)  Approved Year  Approved Month  \
 106                   7.5               0.0           2009               8   
 380                   7.0               0.0           2009               9   
 471                   8.1               0.0           2009              10   
 506                   6.9               0.0           2009               6   
 535                 145.0            2400.0           2009              11   
 ...                   ...               ...            ...             ...   
 45908               200.0            2400.0           2009               7   
 46105                43.1            2150.0           2009              10   
 46322                 1.0               0.0           2009               8   
 47017                 2.3            2400.0           2009              12   
 47157               100.0            2400.0           2009               7   
 
        Volume Traded * Price Per ML  
 106       

In [38]:
# creating empty lists to collect total monthly traded volume and volume weighted average price for each month of each year
monthly_traded_volume_1 = []
monthly_traded_volume_2 = []

volume_weighted_average_price_1 = []
volume_weighted_average_price_2 = []

years_1 = []
years_2 = []

months_1 = []
months_2 = []

# the following 'for' loop collects total monthly traded volume and volume weighted average price for each month of each year
for i in range(len(yearly_data_1)):
    
    month_range = np.unique(yearly_data_1[i]['Approved Month'])

    for j in month_range:
    
        data = yearly_data_1[i][yearly_data_1[i]['Approved Month'].between(j, j)]
        
        price = sum(data['Volume Traded * Price Per ML']) / sum(data['Volume Traded (ML)'])

        volume_weighted_average_price_1.append(price)
        
        
        volume = sum(data['Volume Traded (ML)'])
        
        monthly_traded_volume_1.append(volume)
        
        years_1.append(year_range_1[i])
        months_1.append(j)

# creating a dictionary from the collected data
dict_1 = {'Approved Year': years_1,
        'Approved Month': months_1,
        'Total Traded Volume (ML)': monthly_traded_volume_1, 
        'Volume Weighted Average Price ($)': volume_weighted_average_price_1}

# creating a dataframe from the above-defined dictionary
data_Murray_Lower_Murray = pd.DataFrame(dict_1)


for i in range(len(yearly_data_2)):
    
    month_range = np.unique(yearly_data_2[i]['Approved Month'])

    for j in month_range:
    
        data = yearly_data_2[i][yearly_data_2[i]['Approved Month'].between(j, j)]
        
        price = sum(data['Volume Traded * Price Per ML']) / sum(data['Volume Traded (ML)'])

        volume_weighted_average_price_2.append(price)
        
        
        volume = sum(data['Volume Traded (ML)'])
        
        monthly_traded_volume_2.append(volume)
        
        years_2.append(year_range_2[i])
        months_2.append(j)

# creating a dictionary from the collected data
dict_2 = {'Approved Year': years_2,
        'Approved Month': months_2,
        'Total Traded Volume (ML)': monthly_traded_volume_2, 
        'Volume Weighted Average Price ($)': volume_weighted_average_price_2}

# creating a dataframe from the above-defined dictionary
data_Murray_Goulburn_Murray = pd.DataFrame(dict_2)

In [39]:
# displaying the newly generated dataset of Murray for the 'Lower Murray' selling authority for analysis
data_Murray_Lower_Murray

Unnamed: 0,Approved Year,Approved Month,Total Traded Volume (ML),Volume Weighted Average Price ($)
0,2009,5,89.8,0.000000
1,2009,6,1386.7,1913.133663
2,2009,7,5697.3,2200.595530
3,2009,8,5641.0,2283.588856
4,2009,9,6862.2,2224.409810
...,...,...,...,...
175,2023,12,210.2,4242.245442
176,2024,1,224.7,5175.767557
177,2024,2,750.6,3245.943245
178,2024,3,13.5,5618.592593


In [40]:
# displaying the newly generated dataset of Murray for the 'Goulburn_Murray' selling authority for analysis
data_Murray_Goulburn_Murray

Unnamed: 0,Approved Year,Approved Month,Total Traded Volume (ML),Volume Weighted Average Price ($)
0,2009,5,120.0,2610.816667
1,2009,6,2806.6,672.739543
2,2009,7,18812.9,2227.445579
3,2009,8,1290.2,2265.075947
4,2009,9,240.3,2318.435289
...,...,...,...,...
175,2023,12,595.0,4699.613445
176,2024,1,577.5,5918.474405
177,2024,2,1080.6,1951.919304
178,2024,3,714.4,4044.883819


In [41]:
# creating a new 'Date' feature for both datasets by combining each year with its associated months
data_Murray_Lower_Murray['Date'] = pd.to_datetime(data_Murray_Lower_Murray['Approved Year'].astype(str) + data_Murray_Lower_Murray['Approved Month'].astype(str), format = '%Y%m')
data_Murray_Goulburn_Murray['Date'] = pd.to_datetime(data_Murray_Goulburn_Murray['Approved Year'].astype(str) + data_Murray_Goulburn_Murray['Approved Month'].astype(str), format = '%Y%m')

# removing the previous date information from both datasets after creating a new date feature
data_Murray_Lower_Murray = data_Murray_Lower_Murray.drop(['Approved Year'], axis = 1)
data_Murray_Lower_Murray = data_Murray_Lower_Murray.drop(['Approved Month'], axis = 1)

data_Murray_Goulburn_Murray = data_Murray_Goulburn_Murray.drop(['Approved Year'], axis = 1)
data_Murray_Goulburn_Murray = data_Murray_Goulburn_Murray.drop(['Approved Month'], axis = 1)

In [42]:
# displaying the modified data to clearly see the changes made
data_Murray_Lower_Murray

Unnamed: 0,Total Traded Volume (ML),Volume Weighted Average Price ($),Date
0,89.8,0.000000,2009-05-01
1,1386.7,1913.133663,2009-06-01
2,5697.3,2200.595530,2009-07-01
3,5641.0,2283.588856,2009-08-01
4,6862.2,2224.409810,2009-09-01
...,...,...,...
175,210.2,4242.245442,2023-12-01
176,224.7,5175.767557,2024-01-01
177,750.6,3245.943245,2024-02-01
178,13.5,5618.592593,2024-03-01


In [43]:
# displaying the modified data to clearly see the changes made
data_Murray_Goulburn_Murray

Unnamed: 0,Total Traded Volume (ML),Volume Weighted Average Price ($),Date
0,120.0,2610.816667,2009-05-01
1,2806.6,672.739543,2009-06-01
2,18812.9,2227.445579,2009-07-01
3,1290.2,2265.075947,2009-08-01
4,240.3,2318.435289,2009-09-01
...,...,...,...
175,595.0,4699.613445,2023-12-01
176,577.5,5918.474405,2024-01-01
177,1080.6,1951.919304,2024-02-01
178,714.4,4044.883819,2024-03-01


In [44]:
# exporting the processed water transaction datasets 
data_Murray_Lower_Murray.to_csv('data_Murray_Lower_Murray.csv', index = False)

data_Murray_Goulburn_Murray.to_csv('data_Murray_Goulburn_Murray.csv', index = False)

In [45]:
# extracting all the transaction data from the processed raw data for the 'Goulburn' water system source
data_Goulburn = raw_data[(raw_data["Water System Source"] == "Goulburn")]

In [46]:
# displaying water transaction data for the 'Goulburn' water system source
data_Goulburn.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Water System Source,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,12/7/2013,2013-07-24,23/08/2013,WEE****24,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0,2013,7
3,13/04/2018,2018-04-26,1/5/2018,WEE****36,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0,2018,4
7,29/11/2018,2018-11-29,14/12/2018,WEE****60,Goulburn,1B Boort,Goulburn-Murray Water,1B Boort,Goulburn-Murray Water,1B Boort,20.0,3500.0,2018,11
12,29/11/2020,2020-11-30,8/1/2021,WEE****88,Goulburn,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,600.0,0.0,2020,11
17,8/10/2009,2009-10-08,9/11/2009,WEE****06,Goulburn,1B Boort,Goulburn-Murray Water,1B Boort,Goulburn-Murray Water,1B Boort,1.0,2300.0,2009,10


In [47]:
# removing the 'Water System Source' column from the dataset as it is same for each transaction
data_Goulburn = data_Goulburn.drop("Water System Source", axis = 1)

In [48]:
# displaying the modified data to clearly see the changes made
data_Goulburn.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,12/7/2013,2013-07-24,23/08/2013,WEE****24,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0,2013,7
3,13/04/2018,2018-04-26,1/5/2018,WEE****36,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0,2018,4
7,29/11/2018,2018-11-29,14/12/2018,WEE****60,1B Boort,Goulburn-Murray Water,1B Boort,Goulburn-Murray Water,1B Boort,20.0,3500.0,2018,11
12,29/11/2020,2020-11-30,8/1/2021,WEE****88,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,600.0,0.0,2020,11
17,8/10/2009,2009-10-08,9/11/2009,WEE****06,1B Boort,Goulburn-Murray Water,1B Boort,Goulburn-Murray Water,1B Boort,1.0,2300.0,2009,10


In [49]:
# obtaining necessary information about the trading zone source in the transaction data for Goulburn
data_Goulburn["Trading Zone Source"].value_counts()

1A Greater Goulburn    12166
1B Boort                 373
3 Lower Goulburn         279
Name: Trading Zone Source, dtype: int64

In [50]:
# extracting all the transaction data from the Goulburn transaction data for the chosen trading zone source
data_Goulburn = data_Goulburn[(data_Goulburn["Trading Zone Source"] == "1A Greater Goulburn")]

In [51]:
# displaying the modified data to clearly see the changes made
data_Goulburn.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Trading Zone Source,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,12/7/2013,2013-07-24,23/08/2013,WEE****24,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0,2013,7
3,13/04/2018,2018-04-26,1/5/2018,WEE****36,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0,2018,4
12,29/11/2020,2020-11-30,8/1/2021,WEE****88,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,600.0,0.0,2020,11
19,14/02/2014,2014-03-25,20/05/2014,WEE****48,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,2.0,0.0,2014,3
27,2/9/2011,2011-09-05,29/09/2011,WEE****64,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,95.8,1600.0,2011,9


In [52]:
# removing the 'Trading Zone Source' column from the dataset as it is same for each transaction
data_Goulburn = data_Goulburn.drop("Trading Zone Source", axis = 1)

In [53]:
# displaying the modified data to clearly see the changes made
data_Goulburn.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,12/7/2013,2013-07-24,23/08/2013,WEE****24,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0,2013,7
3,13/04/2018,2018-04-26,1/5/2018,WEE****36,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0,2018,4
12,29/11/2020,2020-11-30,8/1/2021,WEE****88,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,600.0,0.0,2020,11
19,14/02/2014,2014-03-25,20/05/2014,WEE****48,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,2.0,0.0,2014,3
27,2/9/2011,2011-09-05,29/09/2011,WEE****64,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,95.8,1600.0,2011,9


In [54]:
# obtaining necessary information about different seller water authorities in the transaction data for Goulburn
data_Goulburn["Seller Water Authority"].value_counts()

Goulburn-Murray Water    12051
Lower Murray Water         115
Name: Seller Water Authority, dtype: int64

In [55]:
# extracting all the transaction data from the Goulburn transaction data for the 'Goulburn-Murray Water' selling authority
data_Goulburn_Goulburn_Murray = data_Goulburn[(data_Goulburn["Seller Water Authority"] == "Goulburn-Murray Water")]

In [56]:
# displaying the transaction data of Goulburn for the 'Goulburn_Murray Water' selling authority
data_Goulburn_Goulburn_Murray.head()

Unnamed: 0,Application Creation Date,Approved / Refused Date,Recorded / Rejected Date,Water Share ID,Seller Water Authority,Seller Trading Zone Use,Buyer Water Authority,Buyer Trading Zone Use,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,12/7/2013,2013-07-24,23/08/2013,WEE****24,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,17.4,1250.0,2013,7
3,13/04/2018,2018-04-26,1/5/2018,WEE****36,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,1.0,2800.0,2018,4
12,29/11/2020,2020-11-30,8/1/2021,WEE****88,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,600.0,0.0,2020,11
19,14/02/2014,2014-03-25,20/05/2014,WEE****48,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,2.0,0.0,2014,3
27,2/9/2011,2011-09-05,29/09/2011,WEE****64,Goulburn-Murray Water,1A Greater Goulburn,Goulburn-Murray Water,1A Greater Goulburn,95.8,1600.0,2011,9


In [57]:
# creating a list of all the remaining features of the data processed so far
features = list(data_Goulburn_Goulburn_Murray.columns.values)

# creating a list of data features which are desired to be removed from the data for further processing
remove_features = features[:8]

# removing unwanted non-numeric data features from the dataset
data_Goulburn_Goulburn_Murray = data_Goulburn_Goulburn_Murray.drop(remove_features, axis = 1)

In [58]:
# displaying the modified data to clearly see the changes made
data_Goulburn_Goulburn_Murray

Unnamed: 0,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month
0,17.4,1250.0,2013,7
3,1.0,2800.0,2018,4
12,600.0,0.0,2020,11
19,2.0,0.0,2014,3
27,95.8,1600.0,2011,9
...,...,...,...,...
47605,1.0,3500.0,2020,9
47613,1.0,4000.0,2020,2
47617,40.0,0.0,2011,6
47620,100.0,3950.0,2021,1


In [59]:
# multiplying traded volume with its corresponding price to then determine volume weighted average price for each month
volume_times_price = data_Goulburn_Goulburn_Murray['Volume Traded (ML)'] * data_Goulburn_Goulburn_Murray['Price Per ML ($)']

# integrating this feature to both datasets as an additional feature/column
data_Goulburn_Goulburn_Murray['Volume Traded * Price Per ML'] = volume_times_price

In [60]:
# displaying the modified data to clearly see the changes made
data_Goulburn_Goulburn_Murray

Unnamed: 0,Volume Traded (ML),Price Per ML ($),Approved Year,Approved Month,Volume Traded * Price Per ML
0,17.4,1250.0,2013,7,21750.0
3,1.0,2800.0,2018,4,2800.0
12,600.0,0.0,2020,11,0.0
19,2.0,0.0,2014,3,0.0
27,95.8,1600.0,2011,9,153280.0
...,...,...,...,...,...
47605,1.0,3500.0,2020,9,3500.0
47613,1.0,4000.0,2020,2,4000.0
47617,40.0,0.0,2011,6,0.0
47620,100.0,3950.0,2021,1,395000.0


In [61]:
# creating a list of each representative year in the processed data
year_range = np.unique(data_Goulburn_Goulburn_Murray['Approved Year'].values)

# creating an empty list for the below-mentioned reason
yearly_data = []

# collecting traded water volume and allocation price data for each single year in the data
for i in range(len(year_range)):
    yearly_data.append(data_Goulburn_Goulburn_Murray[data_Goulburn_Goulburn_Murray["Approved Year"].between(year_range[i], year_range[i])])

In [62]:
# displaying the 'yearly_data' of Goulburn for the 'Goulburn_Murray' selling authority
yearly_data

[       Volume Traded (ML)  Price Per ML ($)  Approved Year  Approved Month  \
 227                  15.0            1700.0           2009              12   
 259                   1.0               0.0           2009              11   
 322                  10.0            2000.0           2009              10   
 358                  15.0            2400.0           2009               7   
 581                  35.0            2400.0           2009               7   
 ...                   ...               ...            ...             ...   
 47201                 2.0            2000.0           2009              12   
 47378                40.0            1800.0           2009              11   
 47423               200.0            2400.0           2009               7   
 47426                63.3               0.0           2009               9   
 47508                20.0            2400.0           2009              10   
 
        Volume Traded * Price Per ML  
 227       

In [63]:
# creating empty lists to collect total monthly traded volume and volume weighted average price for each month of each year
monthly_traded_volume = []
volume_weighted_average_price = []
years = []
months = []

# the following 'for' loop collects total monthly traded volume and volume weighted average price for each month of each year
for i in range(len(yearly_data)):
    
    month_range = np.unique(yearly_data[i]['Approved Month'])

    for j in month_range:
    
        data = yearly_data[i][yearly_data[i]['Approved Month'].between(j, j)]
        
        price = sum(data['Volume Traded * Price Per ML']) / sum(data['Volume Traded (ML)'])

        volume_weighted_average_price.append(price)
        
        
        volume = sum(data['Volume Traded (ML)'])
        
        monthly_traded_volume.append(volume)
        
        years.append(year_range[i])
        months.append(j)

# creating a dictionary from the collected data
dict = {'Approved Year': years,
        'Approved Month': months,
        'Total Traded Volume (ML)': monthly_traded_volume, 
        'Volume Weighted Average Price ($)': volume_weighted_average_price}

# creating a dataframe from the above-defined dictionary
data_Goulburn_Goulburn_Murray = pd.DataFrame(dict)

In [64]:
# displaying the newly generated dataset of Goulburn for the 'Goulburn_Murray' selling authority for analysis
data_Goulburn_Goulburn_Murray

Unnamed: 0,Approved Year,Approved Month,Total Traded Volume (ML),Volume Weighted Average Price ($)
0,2009,5,491.0,2078.289165
1,2009,6,4281.8,816.759308
2,2009,7,18113.1,2328.518173
3,2009,8,5853.9,1256.423555
4,2009,9,7231.8,2344.605215
...,...,...,...,...
175,2023,12,3961.4,2651.322289
176,2024,1,1808.4,3215.754248
177,2024,2,1682.5,2726.564636
178,2024,3,3670.9,3976.780217


In [65]:
# creating a new 'Date' feature for both datasets by combining each year with its associated months
data_Goulburn_Goulburn_Murray['Date'] = pd.to_datetime(data_Goulburn_Goulburn_Murray['Approved Year'].astype(str) + data_Goulburn_Goulburn_Murray['Approved Month'].astype(str), format = '%Y%m')

# removing the previous date information from both datasets after creating a new date feature
data_Goulburn_Goulburn_Murray = data_Goulburn_Goulburn_Murray.drop(['Approved Year'], axis = 1)
data_Goulburn_Goulburn_Murray = data_Goulburn_Goulburn_Murray.drop(['Approved Month'], axis = 1)

In [66]:
# displaying the modified data to clearly see the changes made
data_Goulburn_Goulburn_Murray

Unnamed: 0,Total Traded Volume (ML),Volume Weighted Average Price ($),Date
0,491.0,2078.289165,2009-05-01
1,4281.8,816.759308,2009-06-01
2,18113.1,2328.518173,2009-07-01
3,5853.9,1256.423555,2009-08-01
4,7231.8,2344.605215,2009-09-01
...,...,...,...
175,3961.4,2651.322289,2023-12-01
176,1808.4,3215.754248,2024-01-01
177,1682.5,2726.564636,2024-02-01
178,3670.9,3976.780217,2024-03-01


In [67]:
# exporting the processed water transaction datasets 
data_Goulburn_Goulburn_Murray.to_csv('data_Goulburn_Goulburn_Murray.csv', index = False)