<a href="https://colab.research.google.com/github/GargyaGokhale/Foundation_of_DataMining/blob/master/training_data_pickle_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/drive')


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
%cd /content/drive/My\ Drive/Colab\ Notebooks/Thesis_Codes
%ls


/content/drive/My Drive/Colab Notebooks/Thesis_Codes
dueling_neural_network_class.ipynb  [0m[01;34mmodel_pickle_files[0m/
[01;34mduelling_neural_network[0m/            training_data_pickle_code.ipynb
ev_fleet_dispatch_model.ipynb
Collecting mip
[?25l  Downloading https://files.pythonhosted.org/packages/55/ac/a5b29aa7f6f35a7cad44388bb19313aed830435b9196339e2e47426fd1f6/mip-1.8.2-py3-none-any.whl (47.6MB)
[K     |████████████████████████████████| 47.6MB 104kB/s 
Installing collected packages: mip
Successfully installed mip-1.8.2


In [0]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d import Axes3D
import pprint
from sklearn import ensemble
from sklearn.preprocessing import MinMaxScaler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.tensorboard import SummaryWriter
# from mip.model import *
import os
import pickle
from datetime import datetime
import pprint
import sys

In [0]:
"""
EV Fleet Model with Dispatch
01-03-2020

"""


"""Helper Functions"""


def truncate_normal(mean, sd, min_value, max_value):
    """Normally distributed random numbers with given mean, standard deviation and max-min"""
    y = np.random.normal(mean, sd, 1)[0]
    # y = min_value + (max_value - min_value) * np.random.rand(1)
    if y < min_value:
        return min_value
    elif min_value <= y <= max_value:
        return y
    else:
        return max_value


def get_index(element, space):
    rows = np.where(element == np.array(space))[0]
    return rows[0]


"""EV Class"""


class EV(object):
    """
    Class for individual EVs.
    Initialises an EV with normally distributed arrival, departure time
        Arrival Time: Between 5- 12 normally distributed at mean  7
        Departure Time: Between arrival_time+3- 20 normally distributed at mean 17
    Energy required is based on normally distributed distance travelled and the efficiency data of Nissan Leaf
        Minimum energy charged is the smallest of normal distribution and the energy_max value
            Energy_max = Power * (departure - arrival time)
        This ensures all cars come in and leave with energy requirement that can be 100% fulfilled by the environment

    Max charging power is 3.0kW

    Calculates energy boundary e_max and e_min, corner priority, power(priority)
        -Corner Priority is calculated using energy required at time t
        and the max energy that can be delivered before departure
        Corner priority is updated at the end of every dispatch instruction
        -Power(priority) is an ON-OFF dispatch such that
            if priority < corner priority  -> Charging ON @max power
            else                           -> Charging OFF

    Calculates the dispatch power based on the bulk power input.

    energy_reset()
        Model stores the states of EV for each time step. Use energy_reset() to reset the state tracker
        before starting a new run.
    """

    def __init__(self):
        self.time_vec = np.arange(5, 21)

        self.e_max = np.zeros(len(self.time_vec))                     # Energy Envelope
        self.e_min = np.zeros(len(self.time_vec))                     # Energy Envelope

        self.arrival_time = truncate_normal(8, 2.0, self.time_vec[0], 12)
        self.depart_time = truncate_normal(17, 2.0, self.arrival_time+3, self.time_vec[-1])
        self.power_max = 3.0                                                    # Maximum charging power in kW
        self.current_energy = 0.

        energy_max = self.power_max * (self.depart_time - self.arrival_time)    # Maximum Energy possible
        energy_req = truncate_normal(80, 100, 40, 172) * 0.174                    # Normally dist. energy
        self.energy_req = min(energy_req, energy_max)            # Energy Required for full charge in kWh

        self.calculate_energy_boundary()
        # self.corner_priority = self.energy_req/((self.depart_time-self.arrival_time)*self.power_max)
        self.corner_priority = 0.
        self.demand_vec = []  # Power Demand Vector

        self.state_tracker = np.zeros(len(self.time_vec))

    def calculate_energy_boundary(self):
        for time in self.time_vec:
            time_index = get_index(time, self.time_vec)
            if time < self.arrival_time:
                self.e_min[time_index] = 0.
                self.e_max[time_index] = 0.
            elif self.arrival_time <= time <= self.depart_time:
                e_min = max((0.75*self.energy_req - self.power_max*(self.depart_time-time)), 0)
                self.e_min[time_index] = e_min

                e_max = min(self.energy_req, self.power_max*(time-self.arrival_time))
                self.e_max[time_index] = e_max
            else:
                self.e_min[time_index] = self.energy_req*0.75
                self.e_max[time_index] = self.energy_req

    def calculate_power(self, priority):
        """If the corner priority is less than priority then charging switched on, else off"""
        if priority <= self.corner_priority:
            i_p = self.power_max
        else:
            i_p = 0.
        return i_p

    def dispatch_instruction(self, time, equilibrium_priority):
        """Dispatches power based on dispatch equilibrium priority and override mechanism"""
        delta_t = 1
        time_index = get_index(time, self.time_vec)
        power_dispatch = self.calculate_power(equilibrium_priority)
        self.current_energy = self.state_tracker[time_index]
        next_energy_state = self.current_energy + (power_dispatch * delta_t)

        if next_energy_state < self.e_min[time_index+1]:
            next_energy_state = self.e_min[time_index+1]
            actual_power = (next_energy_state - self.current_energy)/delta_t
        elif next_energy_state > self.e_max[time_index+1]:
            next_energy_state = self.e_max[time_index + 1]
            actual_power = (next_energy_state - self.current_energy) / delta_t
        else:
            actual_power = power_dispatch

        self.state_tracker[time_index + 1] = next_energy_state  # Update state tracker
        self.current_energy = next_energy_state
        if self.arrival_time <= time < self.depart_time:
            self.corner_priority = (self.energy_req - self.current_energy)/((self.depart_time-time)*self.power_max)
        elif time == self.depart_time:
            self.corner_priority = 0.
        elif time < self.arrival_time:
            self.corner_priority = 0.
        else:
            self.corner_priority = 0.

        return next_energy_state, actual_power

    def energy_reset(self):
        self.current_energy = 0.
        self.state_tracker = np.zeros(len(self.time_vec))
        self.corner_priority = 0.


"""EV Fleet Class"""


class EVFleet(object):
    """
    Initialises the EV Fleet taking input as number of EVs in the fleet
    Calculates the fleet energy boundary by aggregating individual EV boundaries
    Calculates the demand vector for fleet
    Dispatch module calculates equilibrium priority based on input power and passes on to each EV.
        -Returns the next energy state of the EV fleet and the actual power used by EVs, to the environment class
    """

    def __init__(self, number_evs):
        self.n_evs = number_evs
        self.ev_list = [EV() for _ in range(self.n_evs)]

        self.time_vec = self.ev_list[0].time_vec            # Use time vector from EV Class

        self.e_max = np.zeros(len(self.time_vec))
        self.e_min = np.zeros(len(self.time_vec))
        self.state_tracker = np.zeros(len(self.time_vec))
        self.current_energy = 0.

        self.priorities = [self.ev_list[n].corner_priority for n in range(self.n_evs)]
        self.demand_vec, self.priority_vec = self.calculate_demand()
        self.calculate_energy_boundary()

    def calculate_energy_boundary(self):

        for time_index in range(len(self.time_vec)):
            e_min = 0.
            e_max = 0.
            for ev in self.ev_list:
                e_min += ev.e_min[time_index]
                e_max += ev.e_max[time_index]

            self.e_min[time_index] = e_min
            self.e_max[time_index] = e_max

    def calculate_demand(self):
        max_priority = max(max(self.priorities), 1.0)
        priority_step = max_priority / 100
        priority_vec = np.arange(0, max_priority+priority_step, priority_step)
        demand_vec = np.zeros(len(priority_vec))
        priority_index = 0
        for priority in priority_vec:
            power_priority = 0.
            for ev in self.ev_list:
                power_priority += ev.calculate_power(priority)
            demand_vec[priority_index] = power_priority
            priority_index += 1
        self.priorities = [self.ev_list[n].corner_priority for n in range(self.n_evs)]
        return demand_vec, priority_vec

    def calculate_equilibrium_priority(self, power):
        priority_index = np.argmin(abs(self.demand_vec - power))
        return self.priority_vec[priority_index]

    def dispatch_instruction(self, time, power):
        time_index = get_index(time, self.time_vec)
        equilibrium_priority = self.calculate_equilibrium_priority(power)

        next_energy = 0.
        actual_power = 0.

        for ev in self.ev_list:
            e, p = ev.dispatch_instruction(time, equilibrium_priority)
            next_energy += e
            actual_power += p

        real_power = actual_power
        self.state_tracker[time_index+1] = next_energy
        self.demand_vec, self.priority_vec = self.calculate_demand()

        return next_energy, real_power

    def no_dispatch_operation(self, time, power):
        time_index = get_index(time, self.time_vec)
        delta_t = 1
        current_energy_state = self.state_tracker[time_index]
        next_energy_state = current_energy_state + power * delta_t

        if next_energy_state < self.e_min[time_index + 1]:
            next_energy_state = self.e_min[time_index + 1]
            actual_power = (next_energy_state - current_energy_state) / delta_t
        elif next_energy_state > self.e_max[time_index + 1]:
            next_energy_state = self.e_max[time_index + 1]
            actual_power = (next_energy_state - current_energy_state) / delta_t
        else:
            actual_power = power

        self.state_tracker[time_index+1] = next_energy_state

        return next_energy_state, actual_power

    def energy_reset(self):
        self.state_tracker = np.zeros(len(self.time_vec))
        for ev in self.ev_list:
            ev.energy_reset()


"""Environment Class"""


class Environment(object):
    """
    Class for environment which initialises a fleet with a number of EVs and the day ahead price to use
    Price can be a function or it can be taken from an excel/ csv file during the time of initialising
    Class contains reward function, transition function
    Transition function takes input action from control algorithm and passes it as dispatch instruction to EV fleet
    Returns the next energy state of the fleet and the reward obtained to the control algorithm
    """

    def __init__(self, n_evs):

        self.ev_fleet = EVFleet(n_evs)
        self.time_vec = self.ev_fleet.time_vec
        self.price = self.get_price().copy()
        self.state_track = np.zeros(len(self.time_vec))
        self.action_track = np.zeros(len(self.time_vec))


    def get_price(self):
        """
        price = np.zeros(len(self.time_vec))
        time_index = 0
        for time in self.time_vec:
            price[time_index] = 25+5*np.sin(11*np.pi*time/24 - 0.15) + 3*np.cos(22*np.pi*time/24)
            time_index += 1
        """
        price = np.array([26, 30, 19, 18, 30, 26, 30, 17, 19, 27, 32, 5, 5, 27, 28, 24])
        return price

    def transition(self, time, action):
        delta_t = 1
        time_index = get_index(time, self.time_vec)
        next_state, real_action = self.ev_fleet.dispatch_instruction(time, action)
        # next_state, real_action = self.ev_fleet.no_dispatch_operation(time, action)
        real_reward = real_action * delta_t * self.price[time_index]
        self.state_track[time_index+1] = next_state
        self.action_track[time_index] = real_action
        return next_state, real_reward

    def state_reset(self):
        self.state_track = np.zeros(len(self.time_vec))
        self.action_track = np.zeros(len(self.time_vec))
        self.ev_fleet.energy_reset()


def policy(best_action, action_space, threshold):
    """Epsilon Greedy Policy"""
    if threshold < np.random.rand(1):
        return best_action
    else:
        return action_space[np.random.randint(0, len(action_space))]

In [0]:
"""
Dueling Neural Network Class
20-04-2020
Activation Function: tanh
Optimiser: Adam

Inputs:
input_size: Number of input features (int)
output_size: Number of output features (int)
learning_rate: Initial value of learning rate (float)
layer_param: Dictionary for layer sizes for value function, advantage function and fully connected layer
            {'v' : [list]
            'a' : [list]
            'fc': [list]
            }
            Number of neurons per layer and number of layers (list)
            Length of list determines the number of fully connected layers
            Each element of the list determines the number of neurons in that layer (int)

Architecture:         v----
                fc---       }-output
                      a====
"""

"""Dueling Neural Network Class"""


class DuelNeuralNet(nn.Module):

    def __init__(self, input_size, output_size, layer_param, learning_rate):

        super(DuelNeuralNet, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.lr = learning_rate
        self.fc_density = layer_param['fc']
        self.fc_layers = len(self.fc_density)

        self.v_density = layer_param['v']
        self.v_layers = len(self.v_density)

        self.a_density = layer_param['a']
        self.a_layers = len(self.a_density)

        self.layer_param = layer_param

        """Initial fully connected layers"""

        if self.fc_layers == 1:
            self.fc1 = nn.Linear(input_size, self.fc_density[0])
        elif self.fc_layers == 2:
            self.fc1 = nn.Linear(input_size, self.fc_density[0])
            self.fc2 = nn.Linear(self.fc_density[0], self.fc_density[1])
        elif self.fc_layers == 3:
            self.fc1 = nn.Linear(input_size, self.fc_density[0])
            self.fc2 = nn.Linear(self.fc_density[0], self.fc_density[1])
            self.fc3 = nn.Linear(self.fc_density[1], self.fc_density[2])
        else:
            """Error"""
            raise ValueError("Max size of inital fc layers restricted between 1 to 3 fully connected layers")

        """Value Function Layers"""
        if self.v_layers == 1:
            self.vfc1 = nn.Linear(self.fc_density[-1], self.v_density[0])
            self.vfco = nn.Linear(self.v_density[0], 1)
        elif self.v_layers == 2:
            self.vfc1 = nn.Linear(self.fc_density[-1], self.v_density[0])
            self.vfc2 = nn.Linear(self.v_density[0], self.v_density[1])
            self.vfco = nn.Linear(self.v_density[1], 1)
        elif self.v_layers == 3:
            self.vfc1 = nn.Linear(self.fc_density[-1], self.v_density[0])
            self.vfc2 = nn.Linear(self.v_density[0], self.v_density[1])
            self.vfc3 = nn.Linear(self.v_density[1], self.v_density[2])
            self.vfco = nn.Linear(self.v_density[2], 1)
        elif self.v_layers == 4:
            self.vfc1 = nn.Linear(self.fc_density[-1], self.v_density[0])
            self.vfc2 = nn.Linear(self.v_density[0], self.v_density[1])
            self.vfc3 = nn.Linear(self.v_density[1], self.v_density[2])
            self.vfc4 = nn.Linear(self.v_density[2], self.v_density[3])
            self.vfco = nn.Linear(self.v_density[3], 1)
        else:
            """Error"""
            raise ValueError("Max size of v-function layer restricted between 1 to 4 fully connected layers")

        """Advantage Function Layers"""
        if self.a_layers == 1:
            self.afc1 = nn.Linear(self.fc_density[-1], self.a_density[0])
            self.afco = nn.Linear(self.a_density[0], output_size)
        elif self.a_layers == 2:
            self.afc1 = nn.Linear(self.fc_density[-1], self.a_density[0])
            self.afc2 = nn.Linear(self.a_density[0], self.a_density[1])
            self.afco = nn.Linear(self.a_density[1], output_size)
        elif self.a_layers == 3:
            self.afc1 = nn.Linear(self.fc_density[-1], self.a_density[0])
            self.afc2 = nn.Linear(self.a_density[0], self.a_density[1])
            self.afc3 = nn.Linear(self.a_density[1], self.a_density[2])
            self.afco = nn.Linear(self.a_density[2], output_size)
        elif self.a_layers == 4:
            self.afc1 = nn.Linear(self.fc_density[-1], self.a_density[0])
            self.afc2 = nn.Linear(self.a_density[0], self.a_density[1])
            self.afc3 = nn.Linear(self.a_density[1], self.a_density[2])
            self.afc4 = nn.Linear(self.a_density[2], self.a_density[3])
            self.afco = nn.Linear(self.a_density[3], output_size)
        else:
            """Error"""
            raise ValueError("Max size of a-function layer restricted between 1 to 4 fully connected layers")


        """Parameters"""
        self.optimiser = optim.Adam(self.parameters(), lr=self.lr)
        # self.optimiser = optim.AdamW(self.parameters(), lr=0.005)
        # self.optimiser = torch.optim.RMSprop(self.parameters(), lr=0.005)
        self.lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimiser, factor=0.25, patience=50, min_lr=0,
                                                                 threshold=1e-4)
        self.loss_criteria = nn.MSELoss()
        self.eps = 8000
        self.best_loss = 1e-4
        self.patience = 50
        self.early_stop = 0
        self.loss_tolerance = 3e-6
        self.current_loss = 1

        """Useful Values"""
        self.vfun = 0.
        self.afun = 0.

        """Graph Parameters"""
        self.loss_list = []
        self.ep_list = []

        """Scale Parameters"""
        self.input_scaler = MinMaxScaler(feature_range=(-1, 1), copy=True)
        self.output_scaler = MinMaxScaler(feature_range=(0, 1), copy=True)

    def forward(self, x):
        if self.fc_layers == 1:
            x = torch.tanh(self.fc1(x))
        elif self.fc_layers == 2:
            x = torch.tanh(self.fc1(x))
            x = torch.tanh(self.fc2(x))
        elif self.fc_layers == 3:
            x = torch.tanh(self.fc1(x))
            x = torch.tanh(self.fc2(x))
            x = torch.tanh(self.fc3(x))

        v_input = x
        a_input = x

        """Value function stream"""
        if self.v_layers == 1:
            v_input = torch.tanh(self.vfc1(v_input))
        elif self.v_layers == 2:
            v_input = torch.tanh(self.vfc1(v_input))
            v_input = torch.tanh(self.vfc2(v_input))
        elif self.v_layers == 3:
            v_input = torch.tanh(self.vfc1(v_input))
            v_input = torch.tanh(self.vfc2(v_input))
            v_input = torch.tanh(self.vfc3(v_input))
        else:
            v_input = torch.tanh(self.vfc1(v_input))
            v_input = torch.tanh(self.vfc2(v_input))
            v_input = torch.tanh(self.vfc3(v_input))
            v_input = torch.tanh(self.vfc4(v_input))

        self.vfun = self.vfco(v_input)

        """Advantage Function Stream"""
        if self.a_layers == 1:
            a_input = torch.tanh(self.afc1(a_input))
        elif self.a_layers == 2:
            a_input = torch.tanh(self.afc1(a_input))
            a_input = torch.tanh(self.afc2(a_input))
        elif self.a_layers == 3:
            a_input = torch.tanh(self.afc1(a_input))
            a_input = torch.tanh(self.afc2(a_input))
            a_input = torch.tanh(self.afc3(a_input))
        else:
            a_input = torch.tanh(self.afc1(a_input))
            a_input = torch.tanh(self.afc2(a_input))
            a_input = torch.tanh(self.afc3(a_input))
            a_input = torch.tanh(self.afc4(a_input))

        self.afun = self.afco(a_input).view(-1, self.output_size)


        """Aggregating Module Min Type"""
        # Q = V + (A - min A(u) )
        minA = (torch.min(self.afun, dim=1)[0]).unsqueeze(dim=1)
        minA = minA.expand(self.afun.shape)
        output = self.vfun + (self.afun - minA)

        '''
        """Aggregating Module Mean Type"""
        # Q = V + (A - mean A(u) )
        meanA = (torch.mean(self.afun, dim=1)).unsqueeze(dim=1)
        meanA = meanA.expand(self.afun.shape)
        output = self.vfun + (self.afun - meanA)
        '''

        return output

    def training_loop(self, inputs, target, indices):
        inputs = inputs.detach().numpy()
        inputs = torch.tensor(self.input_scaler.fit_transform(inputs), dtype=torch.float32)
        target = target.detach().numpy()
        target = torch.tensor(self.output_scaler.fit_transform(target), dtype=torch.float32)
        ep = 0
        arg_backward = torch.ones(target.shape[1], dtype=torch.float32)
        model_lr = self.optimiser.state_dict()['param_groups'][0]['lr']
        counter = 0
        while ep < self.eps and self.early_stop == 0:
            self.optimiser.zero_grad()
            feed_forward = self.forward(inputs)
            action_indices = (indices.type(torch.long)).view(-1, 1)
            output = torch.gather(feed_forward, dim=1, index=action_indices)
            loss = self.loss_criteria(output, target)
            loss.backward()
            self.optimiser.step()
            # self.lr_scheduler.step(loss)
            # model_lr = self.optimiser.state_dict()['param_groups'][0]['lr']
            self.current_loss = loss.detach().numpy()
            """Early Stopping condition"""
            if self.current_loss < self.best_loss - self.loss_tolerance:
                counter = 0
                self.best_loss = self.current_loss
                self.early_stop = 0
            elif (self.best_loss - self.loss_tolerance) <= self.current_loss < self.best_loss:
                counter += 1
                if counter > self.patience:
                    self.early_stop = 1
                else:
                    self.early_stop = 0
            else:
                counter = 0
                self.early_stop = 0

            ep += 1
            if ep % 100 == 0:
                self.loss_list.append(loss.detach().numpy())
                self.ep_list.append(ep)

    def scale_input_data(self, input_data):
        """Takes input as a tensor
        Applies MinMax Scaler by first converting into ndarray
        Returns a tensor of dtype=float32
        """
        input_data = input_data.detach().numpy()
        return torch.tensor(self.input_scaler.transform(input_data), dtype=torch.float32)

    def scale_output_data(self, output_data):
        """Takes input as a tensor
        Applies MinMax Scaler by first converting into ndarray
        Returns a tensor of dtype=float32
        """
        output_data = output_data.detach().numpy()
        return torch.tensor(self.output_scaler.transform(output_data), dtype=torch.float32)


In [0]:
""" Main Script """
# Initialise
np.random.seed(1)
print_obj = pprint.PrettyPrinter(indent=5)
n_ev = 100
env = Environment(n_ev)

t_time = env.time_vec
# benchmark_value, benchmark_state = benchmark_solve(min_e, max_e, t_time)
U = [30 * i for i in range(11)]  # Action Space

# Parameters
minimise_sample = len(U)
day_duration = len(env.time_vec)

number_previous_states = 4

size = 2000
print("Hour training size =", size)

"""Creating training data"""
training_size_per_hour = size


# Sample Template = [time_index, x_t, u_t, r_t, x_t1, previous_states, u_t_index]
# Initialise training tensor
col_size = 6 + number_previous_states
training_set_tensor = torch.zeros([training_size_per_hour * (day_duration - 1), col_size])


# Create training samples
print("Create Training Samples")
plt.figure("Training Data Visualisation")
plt.plot(t_time, env.ev_fleet.e_min, label='Minimum Energy', linestyle='--', color='black')
plt.plot(t_time, env.ev_fleet.e_max, label='Maximum Energy', linestyle='--', color='black')

for sample_index in range(training_size_per_hour):
    sample_state_track = [0.]
    for hour in range(day_duration - 1):
        # print('Hour = ', t_time[hour])
        hour_index = hour * training_size_per_hour
        if hour == 0:
            x_t = 0.
            previous_state = [0., 0., 0., 0.]
        if sample_index < (training_size_per_hour//3):
            u_min = 0
            u_max = minimise_sample//2
        elif (training_size_per_hour//3) < sample_index < 2*(training_size_per_hour//3):
            u_min = minimise_sample//2
            u_max = minimise_sample
        else:
            u_min = 0
            u_max = minimise_sample
        u_t = U[np.random.randint(u_min, u_max)]
        u_t_index = int(get_index(u_t, U))
        x_t1, r_t = env.transition(t_time[hour], u_t)

        training_set_tensor[hour_index + sample_index, 0] = hour
        training_set_tensor[hour_index + sample_index, 1] = x_t
        training_set_tensor[hour_index + sample_index, 2] = u_t
        training_set_tensor[hour_index + sample_index, 3] = r_t
        training_set_tensor[hour_index + sample_index, 4] = x_t1

        training_set_tensor[hour_index + sample_index, 5] = previous_state[0]
        training_set_tensor[hour_index + sample_index, 6] = previous_state[1]
        training_set_tensor[hour_index + sample_index, 7] = previous_state[2]
        training_set_tensor[hour_index + sample_index, 8] = previous_state[3]

        training_set_tensor[hour_index + sample_index, 9] = u_t_index

        previous_state[0] = previous_state[1]
        previous_state[1] = previous_state[2]
        previous_state[2] = previous_state[3]
        previous_state[3] = x_t

        x_t = x_t1
        sample_state_track.append(x_t)
    plt.plot(t_time, sample_state_track, marker='.')

    env.state_reset()

plt.legend()
plt.xlabel("Hour")
plt.ylabel("State of Energy")


data_path = '/content/drive/My Drive/Colab Notebooks/Thesis_Codes/model_pickle_files'
file_name = "ev_fleet_model_pickle_"+str(size)+"_5_price"
ev_fleet_model_path = os.path.join(data_path, file_name)

with open(ev_fleet_model_path, 'wb') as open_file:
  pickle.dump(env, file=open_file)

file_name = "training_data_"+str(size)+"_5_price"
training_data_path = os.path.join(data_path, file_name)

with open(training_data_path, 'wb') as open_file:
  pickle.dump(training_set_tensor, file=open_file)


print(f"Environment model pickled at \n {ev_fleet_model_path}")
print(f"Training Data pickled at \n {training_data_path}")






Hour training size = 2000
Create Training Samples
