In [1]:
from datetime import datetime
from distutils.util import strtobool

import pandas as pd



# Converts the contents in a .tsf file into a dataframe and returns it along with other meta-data of the dataset: frequency, horizon, whether the dataset contains missing values and whether the series have equal lengths
#
# Parameters
# full_file_path_and_name - complete .tsf file path
# replace_missing_vals_with - a term to indicate the missing values in series in the returning dataframe
# value_column_name - Any name that is preferred to have as the name of the column containing series values in the returning dataframe
def convert_tsf_to_dataframe(
    full_file_path_and_name,
    replace_missing_vals_with="NaN",
    value_column_name="series_value",
):
    col_names = []
    col_types = []
    all_data = {}
    line_count = 0
    frequency = None
    forecast_horizon = None
    contain_missing_values = None
    contain_equal_length = None
    found_data_tag = False
    found_data_section = False
    started_reading_data_section = False

    with open(full_file_path_and_name, "r", encoding="cp1252") as file:
        for line in file:
            # Strip white space from start/end of line
            line = line.strip()

            if line:
                if line.startswith("@"):  # Read meta-data
                    if not line.startswith("@data"):
                        line_content = line.split(" ")
                        if line.startswith("@attribute"):
                            if (
                                len(line_content) != 3
                            ):  # Attributes have both name and type
                                raise Exception("Invalid meta-data specification.")

                            col_names.append(line_content[1])
                            col_types.append(line_content[2])
                        else:
                            if (
                                len(line_content) != 2
                            ):  # Other meta-data have only values
                                raise Exception("Invalid meta-data specification.")

                            if line.startswith("@frequency"):
                                frequency = line_content[1]
                            elif line.startswith("@horizon"):
                                forecast_horizon = int(line_content[1])
                            elif line.startswith("@missing"):
                                contain_missing_values = bool(
                                    strtobool(line_content[1])
                                )
                            elif line.startswith("@equallength"):
                                contain_equal_length = bool(strtobool(line_content[1]))

                    else:
                        if len(col_names) == 0:
                            raise Exception(
                                "Missing attribute section. Attribute section must come before data."
                            )

                        found_data_tag = True
                elif not line.startswith("#"):
                    if len(col_names) == 0:
                        raise Exception(
                            "Missing attribute section. Attribute section must come before data."
                        )
                    elif not found_data_tag:
                        raise Exception("Missing @data tag.")
                    else:
                        if not started_reading_data_section:
                            started_reading_data_section = True
                            found_data_section = True
                            all_series = []

                            for col in col_names:
                                all_data[col] = []

                        full_info = line.split(":")

                        if len(full_info) != (len(col_names) + 1):
                            raise Exception("Missing attributes/values in series.")

                        series = full_info[len(full_info) - 1]
                        series = series.split(",")

                        if len(series) == 0:
                            raise Exception(
                                "A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol"
                            )

                        numeric_series = []

                        for val in series:
                            if val == "?":
                                numeric_series.append(replace_missing_vals_with)
                            else:
                                numeric_series.append(float(val))

                        if numeric_series.count(replace_missing_vals_with) == len(
                            numeric_series
                        ):
                            raise Exception(
                                "All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series."
                            )

                        all_series.append(pd.Series(numeric_series).array)

                        for i in range(len(col_names)):
                            att_val = None
                            if col_types[i] == "numeric":
                                att_val = int(full_info[i])
                            elif col_types[i] == "string":
                                att_val = str(full_info[i])
                            elif col_types[i] == "date":
                                att_val = datetime.strptime(
                                    full_info[i], "%Y-%m-%d %H-%M-%S"
                                )
                            else:
                                raise Exception(
                                    "Invalid attribute type."
                                )  # Currently, the code supports only numeric, string and date types. Extend this as required.

                            if att_val is None:
                                raise Exception("Invalid attribute value.")
                            else:
                                all_data[col_names[i]].append(att_val)

                line_count = line_count + 1

        if line_count == 0:
            raise Exception("Empty file.")
        if len(col_names) == 0:
            raise Exception("Missing attribute section.")
        if not found_data_section:
            raise Exception("Missing series information under data section.")

        all_data[value_column_name] = all_series
        loaded_data = pd.DataFrame(all_data)

        return (
            loaded_data,
            frequency,
            forecast_horizon,
            contain_missing_values,
            contain_equal_length,
        )


#loaded_data, frequency, forecast_horizon, contain_missing_values, contain_equal_length = convert_tsf_to_dataframe("TSForecasting/tsf_data/sample.tsf")

#print(loaded_data)
#print(frequency)
#print(forecast_horizon)
#print(contain_missing_values)
#print(contain_equal_length)

In [2]:
Phase_1 = convert_tsf_to_dataframe("phase_1_data.tsf")
Phase_1

(   series_name     start_timestamp  \
 0    Building0 2016-07-03 21:30:00   
 1    Building1 2019-01-09 23:15:00   
 2    Building3 2016-03-01 04:15:00   
 3    Building4 2019-07-03 04:45:00   
 4    Building5 2019-07-25 23:00:00   
 5    Building6 2019-07-25 01:45:00   
 6       Solar0 2020-04-25 14:00:00   
 7       Solar1 2018-12-31 13:00:00   
 8       Solar2 2019-06-05 14:00:00   
 9       Solar3 2019-06-05 14:00:00   
 10      Solar4 2019-06-05 14:00:00   
 11      Solar5 2019-01-15 13:00:00   
 
                                          series_value  
 0   [283.8, 283.8, 283.8, 606.0, 606.0, 606.0, 606...  
 1   [8.1, 15.7, 22.8, 32.7, 8.1, 16.5, 24.7, 34.5,...  
 2   [1321.0, 1321.0, 1321.0, 1321.0, 1293.0, 1293....  
 3   [2.0, NaN, 1.0, 2.0, NaN, 2.0, NaN, NaN, 2.0, ...  
 4   [30.0, 31.0, 24.0, 34.0, 30.0, 31.0, 26.0, 33....  
 5   [36.8, 34.6, 34.6, 36.2, 36.2, 35.2, 35.2, 35....  
 6   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  
 7   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [3]:
Phase_2 = convert_tsf_to_dataframe("phase_2_data.tsf")
Phase_2

(   series_name     start_timestamp  \
 0    Building0 2016-07-03 21:30:00   
 1    Building1 2019-01-09 23:15:00   
 2    Building3 2016-03-01 04:15:00   
 3    Building4 2019-07-03 04:45:00   
 4    Building5 2019-07-25 23:00:00   
 5    Building6 2019-07-25 01:45:00   
 6       Solar0 2020-04-25 14:00:00   
 7       Solar1 2018-12-31 13:00:00   
 8       Solar2 2019-06-05 14:00:00   
 9       Solar3 2019-06-05 14:00:00   
 10      Solar4 2019-06-05 14:00:00   
 11      Solar5 2019-01-15 13:00:00   
 
                                          series_value  
 0   [283.8, 283.8, 283.8, 606.0, 606.0, 606.0, 606...  
 1   [8.1, 15.7, 22.8, 32.7, 8.1, 16.5, 24.7, 34.5,...  
 2   [1321.0, 1321.0, 1321.0, 1321.0, 1293.0, 1293....  
 3   [2.0, NaN, 1.0, 2.0, NaN, 2.0, NaN, NaN, 2.0, ...  
 4   [30.0, 31.0, 24.0, 34.0, 30.0, 31.0, 26.0, 33....  
 5   [36.8, 34.6, 34.6, 36.2, 36.2, 35.2, 35.2, 35....  
 6   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  
 7   [0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [4]:
loaded_data, frequency, forecast_horizon, contain_missing_values, contain_equal_length = Phase_1

In [5]:
loaded_data

Unnamed: 0,series_name,start_timestamp,series_value
0,Building0,2016-07-03 21:30:00,"[283.8, 283.8, 283.8, 606.0, 606.0, 606.0, 606..."
1,Building1,2019-01-09 23:15:00,"[8.1, 15.7, 22.8, 32.7, 8.1, 16.5, 24.7, 34.5,..."
2,Building3,2016-03-01 04:15:00,"[1321.0, 1321.0, 1321.0, 1321.0, 1293.0, 1293...."
3,Building4,2019-07-03 04:45:00,"[2.0, NaN, 1.0, 2.0, NaN, 2.0, NaN, NaN, 2.0, ..."
4,Building5,2019-07-25 23:00:00,"[30.0, 31.0, 24.0, 34.0, 30.0, 31.0, 26.0, 33...."
5,Building6,2019-07-25 01:45:00,"[36.8, 34.6, 34.6, 36.2, 36.2, 35.2, 35.2, 35...."
6,Solar0,2020-04-25 14:00:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
7,Solar1,2018-12-31 13:00:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8,Solar2,2019-06-05 14:00:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
9,Solar3,2019-06-05 14:00:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [6]:
dataset = loaded_data.explode('series_value')
dataset

Unnamed: 0,series_name,start_timestamp,series_value
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:30:00,606.0
0,Building0,2016-07-03 21:30:00,606.0
...,...,...,...
11,Solar5,2019-01-15 13:00:00,25.28
11,Solar5,2019-01-15 13:00:00,25.3
11,Solar5,2019-01-15 13:00:00,28.62
11,Solar5,2019-01-15 13:00:00,31.94


## Building 0 

In [7]:
building_0 = dataset.loc[dataset['series_name']=='Building0']
building_0

Unnamed: 0,series_name,start_timestamp,series_value
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:30:00,606.0
0,Building0,2016-07-03 21:30:00,606.0
...,...,...,...
0,Building0,2016-07-03 21:30:00,96.9
0,Building0,2016-07-03 21:30:00,96.9
0,Building0,2016-07-03 21:30:00,37.4
0,Building0,2016-07-03 21:30:00,37.4


In [8]:
import datetime

# Input start date
start_date_str = '3/7/2016 21:30:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][0])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [9]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = datetime.datetime.strptime(end_date_str, '%d/%m/%Y %H:%M:%S')

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')


building_0['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_0['start_timestamp']=datetime_column


In [10]:
building_0

Unnamed: 0,series_name,start_timestamp,series_value
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:45:00,283.8
0,Building0,2016-07-03 22:00:00,283.8
0,Building0,2016-07-03 22:15:00,606.0
0,Building0,2016-07-03 22:30:00,606.0
...,...,...,...
0,Building0,2020-09-30 22:45:00,96.9
0,Building0,2020-09-30 23:00:00,96.9
0,Building0,2020-09-30 23:15:00,37.4
0,Building0,2020-09-30 23:30:00,37.4


In [11]:
building_0 = building_0.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"energy_demand"})

In [12]:
building_0.dtypes

building_no              object
timestamp        datetime64[ns]
energy_demand            object
dtype: object

In [13]:
building_0 = building_0[building_0.energy_demand != "NaN"]

In [14]:
building_0["energy_demand"] = pd.to_numeric(building_0["energy_demand"])
building_0

Unnamed: 0,building_no,timestamp,energy_demand
0,Building0,2016-07-03 21:30:00,283.8
0,Building0,2016-07-03 21:45:00,283.8
0,Building0,2016-07-03 22:00:00,283.8
0,Building0,2016-07-03 22:15:00,606.0
0,Building0,2016-07-03 22:30:00,606.0
...,...,...,...
0,Building0,2020-09-30 22:45:00,96.9
0,Building0,2020-09-30 23:00:00,96.9
0,Building0,2020-09-30 23:15:00,37.4
0,Building0,2020-09-30 23:30:00,37.4


## Building 1 

In [15]:
building_1 = dataset.loc[dataset['series_name']=='Building1']
building_1

Unnamed: 0,series_name,start_timestamp,series_value
1,Building1,2019-01-09 23:15:00,8.1
1,Building1,2019-01-09 23:15:00,15.7
1,Building1,2019-01-09 23:15:00,22.8
1,Building1,2019-01-09 23:15:00,32.7
1,Building1,2019-01-09 23:15:00,8.1
...,...,...,...
1,Building1,2019-01-09 23:15:00,14.4
1,Building1,2019-01-09 23:15:00,18.8
1,Building1,2019-01-09 23:15:00,4.5
1,Building1,2019-01-09 23:15:00,9.6


In [16]:
import datetime

# Input start date
start_date_str = '09/01/2016 23:15:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][1])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2017 23:45:00


In [17]:
end_date_str = '30/09/2017 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
building_1['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_1['start_timestamp']=datetime_column


In [18]:
building_1

Unnamed: 0,series_name,start_timestamp,series_value
1,Building1,2016-01-09 23:15:00,8.1
1,Building1,2016-01-09 23:30:00,15.7
1,Building1,2016-01-09 23:45:00,22.8
1,Building1,2016-01-10 00:00:00,32.7
1,Building1,2016-01-10 00:15:00,8.1
...,...,...,...
1,Building1,2017-09-30 22:45:00,14.4
1,Building1,2017-09-30 23:00:00,18.8
1,Building1,2017-09-30 23:15:00,4.5
1,Building1,2017-09-30 23:30:00,9.6


In [19]:
building_1 = building_1.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"energy_demand"})

In [20]:
building_1 = building_1[building_1.energy_demand != "NaN"]

In [21]:
building_1["energy_demand"] = pd.to_numeric(building_1["energy_demand"])
building_1

Unnamed: 0,building_no,timestamp,energy_demand
1,Building1,2016-01-09 23:15:00,8.1
1,Building1,2016-01-09 23:30:00,15.7
1,Building1,2016-01-09 23:45:00,22.8
1,Building1,2016-01-10 00:00:00,32.7
1,Building1,2016-01-10 00:15:00,8.1
...,...,...,...
1,Building1,2017-09-30 22:45:00,14.4
1,Building1,2017-09-30 23:00:00,18.8
1,Building1,2017-09-30 23:15:00,4.5
1,Building1,2017-09-30 23:30:00,9.6


## Building 3 

In [22]:
building_3 = dataset.loc[dataset['series_name']=='Building3']
building_3

Unnamed: 0,series_name,start_timestamp,series_value
2,Building3,2016-03-01 04:15:00,1321.0
2,Building3,2016-03-01 04:15:00,1321.0
2,Building3,2016-03-01 04:15:00,1321.0
2,Building3,2016-03-01 04:15:00,1321.0
2,Building3,2016-03-01 04:15:00,1293.0
...,...,...,...
2,Building3,2016-03-01 04:15:00,389.0
2,Building3,2016-03-01 04:15:00,389.0
2,Building3,2016-03-01 04:15:00,415.0
2,Building3,2016-03-01 04:15:00,415.0


In [23]:
import datetime

# Input start date
start_date_str = '01/03/2016 04:15:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][2])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [24]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
building_3['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_3['start_timestamp']=datetime_column


In [25]:
building_3 = building_3.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"energy_demand"})

In [26]:
building_3 = building_3[building_3.energy_demand != "NaN"]

In [27]:
building_3["energy_demand"] = pd.to_numeric(building_3["energy_demand"])
building_3

Unnamed: 0,building_no,timestamp,energy_demand
2,Building3,2016-03-01 04:15:00,1321.0
2,Building3,2016-03-01 04:30:00,1321.0
2,Building3,2016-03-01 04:45:00,1321.0
2,Building3,2016-03-01 05:00:00,1321.0
2,Building3,2016-03-01 05:15:00,1293.0
...,...,...,...
2,Building3,2020-09-30 22:45:00,389.0
2,Building3,2020-09-30 23:00:00,389.0
2,Building3,2020-09-30 23:15:00,415.0
2,Building3,2020-09-30 23:30:00,415.0


## Building 4

In [28]:
building_4 = dataset.loc[dataset['series_name']=='Building4']
building_4

Unnamed: 0,series_name,start_timestamp,series_value
3,Building4,2019-07-03 04:45:00,2.0
3,Building4,2019-07-03 04:45:00,
3,Building4,2019-07-03 04:45:00,1.0
3,Building4,2019-07-03 04:45:00,2.0
3,Building4,2019-07-03 04:45:00,
...,...,...,...
3,Building4,2019-07-03 04:45:00,
3,Building4,2019-07-03 04:45:00,1.0
3,Building4,2019-07-03 04:45:00,
3,Building4,2019-07-03 04:45:00,


In [29]:
import datetime

# Input start date
start_date_str = '03/07/2019 04:45:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][3])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [30]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
building_4['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_4['start_timestamp']=datetime_column


In [31]:
building_4 = building_4.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"energy_demand"})

In [32]:
building_4 = building_4[building_4.energy_demand != "NaN"]

In [33]:
building_4["energy_demand"] = pd.to_numeric(building_4["energy_demand"])
building_4

Unnamed: 0,building_no,timestamp,energy_demand
3,Building4,2019-07-03 04:45:00,2.0
3,Building4,2019-07-03 05:15:00,1.0
3,Building4,2019-07-03 05:30:00,2.0
3,Building4,2019-07-03 06:00:00,2.0
3,Building4,2019-07-03 06:45:00,2.0
...,...,...,...
3,Building4,2020-09-30 20:45:00,1.0
3,Building4,2020-09-30 21:00:00,2.0
3,Building4,2020-09-30 21:30:00,2.0
3,Building4,2020-09-30 22:00:00,1.0


## Building 5

In [34]:
building_5 = dataset.loc[dataset['series_name']=='Building5']
building_5

Unnamed: 0,series_name,start_timestamp,series_value
4,Building5,2019-07-25 23:00:00,30.0
4,Building5,2019-07-25 23:00:00,31.0
4,Building5,2019-07-25 23:00:00,24.0
4,Building5,2019-07-25 23:00:00,34.0
4,Building5,2019-07-25 23:00:00,30.0
...,...,...,...
4,Building5,2019-07-25 23:00:00,
4,Building5,2019-07-25 23:00:00,
4,Building5,2019-07-25 23:00:00,
4,Building5,2019-07-25 23:00:00,


In [35]:
import datetime

# Input start date
start_date_str = '25/07/2019 23:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][4])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [36]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
building_5['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_5['start_timestamp']=datetime_column


In [37]:
building_5 = building_5.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"energy_demand"})

In [38]:
building_5 = building_5[building_5.energy_demand != "NaN"]

In [39]:
building_5["energy_demand"] = pd.to_numeric(building_5["energy_demand"])
building_5

Unnamed: 0,building_no,timestamp,energy_demand
4,Building5,2019-07-25 23:00:00,30.0
4,Building5,2019-07-25 23:15:00,31.0
4,Building5,2019-07-25 23:30:00,24.0
4,Building5,2019-07-25 23:45:00,34.0
4,Building5,2019-07-26 00:00:00,30.0
...,...,...,...
4,Building5,2020-09-30 04:00:00,35.0
4,Building5,2020-09-30 04:15:00,17.0
4,Building5,2020-09-30 04:30:00,35.0
4,Building5,2020-09-30 04:45:00,2.0


## Building 6

In [40]:
building_6 = dataset.loc[dataset['series_name']=='Building6']
building_6

Unnamed: 0,series_name,start_timestamp,series_value
5,Building6,2019-07-25 01:45:00,36.8
5,Building6,2019-07-25 01:45:00,34.6
5,Building6,2019-07-25 01:45:00,34.6
5,Building6,2019-07-25 01:45:00,36.2
5,Building6,2019-07-25 01:45:00,36.2
...,...,...,...
5,Building6,2019-07-25 01:45:00,35.6
5,Building6,2019-07-25 01:45:00,38.6
5,Building6,2019-07-25 01:45:00,38.6
5,Building6,2019-07-25 01:45:00,38.6


In [41]:
import datetime

# Input start date
start_date_str = '25/07/2019 01:45:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][5])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [42]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
building_6['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_6['start_timestamp']=datetime_column


In [43]:
building_6 = building_6.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"energy_demand"})

In [44]:
building_6 = building_6[building_6.energy_demand != "NaN"]

In [45]:
building_6["energy_demand"] = pd.to_numeric(building_6["energy_demand"])
building_6

Unnamed: 0,building_no,timestamp,energy_demand
5,Building6,2019-07-25 01:45:00,36.8
5,Building6,2019-07-25 02:00:00,34.6
5,Building6,2019-07-25 02:15:00,34.6
5,Building6,2019-07-25 02:30:00,36.2
5,Building6,2019-07-25 02:45:00,36.2
...,...,...,...
5,Building6,2020-09-30 22:45:00,35.6
5,Building6,2020-09-30 23:00:00,38.6
5,Building6,2020-09-30 23:15:00,38.6
5,Building6,2020-09-30 23:30:00,38.6


## Solar 0

In [46]:
solar_0 = dataset.loc[dataset['series_name']=='Solar0']
solar_0

Unnamed: 0,series_name,start_timestamp,series_value
6,Solar0,2020-04-25 14:00:00,0.0
6,Solar0,2020-04-25 14:00:00,0.0
6,Solar0,2020-04-25 14:00:00,0.0
6,Solar0,2020-04-25 14:00:00,0.0
6,Solar0,2020-04-25 14:00:00,0.0
...,...,...,...
6,Solar0,2020-04-25 14:00:00,29.67
6,Solar0,2020-04-25 14:00:00,29.94
6,Solar0,2020-04-25 14:00:00,34.02
6,Solar0,2020-04-25 14:00:00,38.1


In [47]:
import datetime

# Input start date
start_date_str = '25/04/2020 14:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][6])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [48]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
solar_0['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_0['start_timestamp']=datetime_column


In [49]:
solar_0 = solar_0.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"power_output"})

In [50]:
solar_0 = solar_0[solar_0.power_output != "NaN"]

In [51]:
solar_0["power_output"] = pd.to_numeric(solar_0["power_output"])
solar_0

Unnamed: 0,building_no,timestamp,power_output
6,Solar0,2020-04-25 14:00:00,0.00
6,Solar0,2020-04-25 14:15:00,0.00
6,Solar0,2020-04-25 14:30:00,0.00
6,Solar0,2020-04-25 14:45:00,0.00
6,Solar0,2020-04-25 15:00:00,0.00
...,...,...,...
6,Solar0,2020-09-30 22:45:00,29.67
6,Solar0,2020-09-30 23:00:00,29.94
6,Solar0,2020-09-30 23:15:00,34.02
6,Solar0,2020-09-30 23:30:00,38.10


## Solar 1 

In [52]:
solar_1 = dataset.loc[dataset['series_name']=='Solar1']
solar_1

Unnamed: 0,series_name,start_timestamp,series_value
7,Solar1,2018-12-31 13:00:00,0.0
7,Solar1,2018-12-31 13:00:00,0.0
7,Solar1,2018-12-31 13:00:00,0.0
7,Solar1,2018-12-31 13:00:00,0.0
7,Solar1,2018-12-31 13:00:00,0.0
...,...,...,...
7,Solar1,2018-12-31 13:00:00,6.68
7,Solar1,2018-12-31 13:00:00,6.71
7,Solar1,2018-12-31 13:00:00,8.13
7,Solar1,2018-12-31 13:00:00,9.55


In [53]:
import datetime

# Input start date
start_date_str = '31/12/2018 13:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][7])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [54]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
solar_1['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_1['start_timestamp']=datetime_column


In [55]:
solar_1 = solar_1.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"power_output"})

In [56]:
solar_1 = solar_1[solar_1.power_output != "NaN"]

In [57]:
solar_1["power_output"] = pd.to_numeric(solar_1["power_output"])
solar_1

Unnamed: 0,building_no,timestamp,power_output
7,Solar1,2018-12-31 13:00:00,0.00
7,Solar1,2018-12-31 13:15:00,0.00
7,Solar1,2018-12-31 13:30:00,0.00
7,Solar1,2018-12-31 13:45:00,0.00
7,Solar1,2018-12-31 14:00:00,0.00
...,...,...,...
7,Solar1,2020-09-30 22:45:00,6.68
7,Solar1,2020-09-30 23:00:00,6.71
7,Solar1,2020-09-30 23:15:00,8.13
7,Solar1,2020-09-30 23:30:00,9.55


## Solar 2 

In [59]:
solar_2 = dataset.loc[dataset['series_name']=='Solar2']
solar_2

Unnamed: 0,series_name,start_timestamp,series_value
8,Solar2,2019-06-05 14:00:00,0.0
8,Solar2,2019-06-05 14:00:00,0.0
8,Solar2,2019-06-05 14:00:00,0.0
8,Solar2,2019-06-05 14:00:00,0.0
8,Solar2,2019-06-05 14:00:00,0.0
...,...,...,...
8,Solar2,2019-06-05 14:00:00,6.34
8,Solar2,2019-06-05 14:00:00,6.39
8,Solar2,2019-06-05 14:00:00,7.71
8,Solar2,2019-06-05 14:00:00,9.03


In [61]:
import datetime

# Input start date
start_date_str = '05/06/2019 14:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][8])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [62]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
solar_2['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_2['start_timestamp']=datetime_column


In [63]:
solar_2 = solar_2.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"power_output"})

In [64]:
solar_2 = solar_2[solar_2.power_output != "NaN"]

In [65]:
solar_2["power_output"] = pd.to_numeric(solar_2["power_output"])
solar_2

Unnamed: 0,building_no,timestamp,power_output
8,Solar2,2019-06-05 14:00:00,0.00
8,Solar2,2019-06-05 14:15:00,0.00
8,Solar2,2019-06-05 14:30:00,0.00
8,Solar2,2019-06-05 14:45:00,0.00
8,Solar2,2019-06-05 15:00:00,0.00
...,...,...,...
8,Solar2,2020-09-30 22:45:00,6.34
8,Solar2,2020-09-30 23:00:00,6.39
8,Solar2,2020-09-30 23:15:00,7.71
8,Solar2,2020-09-30 23:30:00,9.03


## Solar 3

In [66]:
solar_3 = dataset.loc[dataset['series_name']=='Solar3']
solar_3

Unnamed: 0,series_name,start_timestamp,series_value
9,Solar3,2019-06-05 14:00:00,0.0
9,Solar3,2019-06-05 14:00:00,0.0
9,Solar3,2019-06-05 14:00:00,0.0
9,Solar3,2019-06-05 14:00:00,0.0
9,Solar3,2019-06-05 14:00:00,0.0
...,...,...,...
9,Solar3,2019-06-05 14:00:00,5.49
9,Solar3,2019-06-05 14:00:00,5.65
9,Solar3,2019-06-05 14:00:00,6.83
9,Solar3,2019-06-05 14:00:00,8.02


In [67]:
import datetime

# Input start date
start_date_str = '05/06/2019 14:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][9])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [68]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
solar_3['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_3['start_timestamp']=datetime_column


In [69]:
solar_3 = solar_3.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"power_output"})

In [70]:
solar_3 = solar_3[solar_3.power_output != "NaN"]

In [71]:
solar_3["power_output"] = pd.to_numeric(solar_3["power_output"])
solar_3

Unnamed: 0,building_no,timestamp,power_output
9,Solar3,2019-06-05 14:00:00,0.00
9,Solar3,2019-06-05 14:15:00,0.00
9,Solar3,2019-06-05 14:30:00,0.00
9,Solar3,2019-06-05 14:45:00,0.00
9,Solar3,2019-06-05 15:00:00,0.00
...,...,...,...
9,Solar3,2020-09-30 22:45:00,5.49
9,Solar3,2020-09-30 23:00:00,5.65
9,Solar3,2020-09-30 23:15:00,6.83
9,Solar3,2020-09-30 23:30:00,8.02


## Solar 4

In [72]:
solar_4 = dataset.loc[dataset['series_name']=='Solar4']
solar_4

Unnamed: 0,series_name,start_timestamp,series_value
10,Solar4,2019-06-05 14:00:00,0.0
10,Solar4,2019-06-05 14:00:00,0.0
10,Solar4,2019-06-05 14:00:00,0.0
10,Solar4,2019-06-05 14:00:00,0.0
10,Solar4,2019-06-05 14:00:00,0.0
...,...,...,...
10,Solar4,2019-06-05 14:00:00,4.55
10,Solar4,2019-06-05 14:00:00,4.57
10,Solar4,2019-06-05 14:00:00,5.32
10,Solar4,2019-06-05 14:00:00,6.08


In [73]:
import datetime

# Input start date
start_date_str = '05/06/2019 14:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][10])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [74]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
solar_4['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_4['start_timestamp']=datetime_column


In [75]:
solar_4 = solar_4.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"power_output"})

In [76]:
solar_4 = solar_4[solar_4.power_output != "NaN"]

In [77]:
solar_4["power_output"] = pd.to_numeric(solar_4["power_output"])
solar_4

Unnamed: 0,building_no,timestamp,power_output
10,Solar4,2019-06-05 14:00:00,0.00
10,Solar4,2019-06-05 14:15:00,0.00
10,Solar4,2019-06-05 14:30:00,0.00
10,Solar4,2019-06-05 14:45:00,0.00
10,Solar4,2019-06-05 15:00:00,0.00
...,...,...,...
10,Solar4,2020-09-30 22:45:00,4.55
10,Solar4,2020-09-30 23:00:00,4.57
10,Solar4,2020-09-30 23:15:00,5.32
10,Solar4,2020-09-30 23:30:00,6.08


## Solar 5

In [78]:
solar_5 = dataset.loc[dataset['series_name']=='Solar5']
solar_5

Unnamed: 0,series_name,start_timestamp,series_value
11,Solar5,2019-01-15 13:00:00,0.0
11,Solar5,2019-01-15 13:00:00,0.0
11,Solar5,2019-01-15 13:00:00,0.0
11,Solar5,2019-01-15 13:00:00,0.0
11,Solar5,2019-01-15 13:00:00,0.0
...,...,...,...
11,Solar5,2019-01-15 13:00:00,25.28
11,Solar5,2019-01-15 13:00:00,25.3
11,Solar5,2019-01-15 13:00:00,28.62
11,Solar5,2019-01-15 13:00:00,31.94


In [79]:
import datetime

# Input start date
start_date_str = '15/01/2019 13:00:00'
start_date = datetime.datetime.strptime(start_date_str, '%d/%m/%Y %H:%M:%S')

# Interval duration
interval_duration = datetime.timedelta(minutes=15)

# Number of intervals
num_intervals = len(loaded_data['series_value'][11])

# Calculate the end date
end_date = start_date + interval_duration * (num_intervals-1)

# Print the end date
print("End Date:", end_date.strftime('%d/%m/%Y %H:%M:%S'))

End Date: 30/09/2020 23:45:00


In [80]:
end_date_str = '30/09/2020 23:45:00'

# Convert start and end date strings to datetime objects
end_date = pd.to_datetime(end_date_str)

# Create a column of datetime values between start and end date with a frequency of 15 minutes
datetime_column = pd.date_range(start=start_date, end=end_date, freq='15T')

#building_0['start_timestamp']=datetime_column
solar_5['start_timestamp']=datetime_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  solar_5['start_timestamp']=datetime_column


In [81]:
solar_5 = solar_5.rename(columns={"series_name": "building_no", "start_timestamp": "timestamp","series_value":"power_output"})

In [82]:
solar_5 = solar_5[solar_5.power_output != "NaN"]

In [83]:
solar_5["power_output"] = pd.to_numeric(solar_5["power_output"])
solar_5

Unnamed: 0,building_no,timestamp,power_output
11,Solar5,2019-01-15 13:00:00,0.00
11,Solar5,2019-01-15 13:15:00,0.00
11,Solar5,2019-01-15 13:30:00,0.00
11,Solar5,2019-01-15 13:45:00,0.00
11,Solar5,2019-01-15 14:00:00,0.00
...,...,...,...
11,Solar5,2020-09-30 22:45:00,25.28
11,Solar5,2020-09-30 23:00:00,25.30
11,Solar5,2020-09-30 23:15:00,28.62
11,Solar5,2020-09-30 23:30:00,31.94
