In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gurobipy as gp
from gurobipy import GRB

In [47]:
# Function to generate samples for loads and wind production with final refinements
def generate_samples(num_samples, loads, sd, wind_limits, wind_shape):
    data = []
    sd = [mean * sd for mean in loads]  # Standard deviation for loads

    for sample_num in range(1, num_samples + 1):
        daily_variability = 0.8 + 0.2 * np.clip(np.random.normal(0.5,0.2),0, 1) 
        
        for hour in range(24):  # Fixed to 24 hours per day
            # Generate loads using normal distribution
            load_sample = [
                round(np.clip(np.random.normal(mean, std), 0, mean * 2), 2) 
                for mean, std in zip(loads, sd)
            ]  # Clipping to avoid unrealistic negative values or extreme high values
            
            # Generate wind production -> distinction between W1 and W2 + daily variability
            wind_sample = [
                # round(min((np.random.weibull(wind_shape) * (0.50 if limit == 10 else 0.45) * limit), limit * daily_variability), 2)
                round(min((np.random.weibull(wind_shape) *  0.50 * limit), limit * daily_variability), 2)
                for limit in wind_limits
            ]
            
            # Combine into one row with [Sample Number, Hour, L1, L2, L3, W1, W2]
            row = [sample_num, hour] + load_sample + wind_sample
            data.append(row)
    
    return data

# Load L1-L3 and wind capacity W1-W2
loads = [10, 112, 120]  # Mean values for L1, L2, L3
wind_limits = [10, 30]  # Maximum capacity for W1 and W2

# Generate data for the samples with final refinements
samples_data = generate_samples(num_samples=1, loads=loads, sd=0.10, 
                                wind_limits=wind_limits, wind_shape=1.1)

# Create column names for the DataFrame
columns = ["Sample_Nr", "Hour", "L1", "L2", "L3", "W1", "W2"]  # Sample_Nr ~ Day

# Create the DataFrame
samples_df = pd.DataFrame(samples_data, columns=columns)

# Save to CSV
#samples_df.to_csv("sample_simon.csv", index=False)

# Display the updated DataFrame structure
#samples_df.head(30)


## Task 1] Build the optimisation model

In [48]:
# Load the data from the data folder
wind_forecast = samples_df[['Hour', 'W1', 'W2']].copy()
load = samples_df[['Hour', 'L1', 'L2','L3']].copy()
bus = pd.read_csv('../Data/B (power transfer factor of each bus to each line).csv', delimiter=';')
max_prod = pd.read_csv('../Data/Maximum production of generating units.csv', delimiter=';')
min_prod = pd.read_csv('../Data/Minimum production of generating units.csv', delimiter=';')
min_down_time = pd.read_csv('../Data/Minimum down time of generating units.csv', delimiter=';')
min_up_time = pd.read_csv('../Data/Minimum up time of generating units.csv', delimiter=';')
prod_cost = pd.read_csv('../Data/Production cost of generating units.csv', delimiter=';')
ramp_rate = pd.read_csv('../Data/Ramping rate of generating units.csv', delimiter=';')
start_up_cost = pd.read_csv('../Data/Start-up cost of generating units.csv', delimiter=';')
transmission_cap = pd.read_csv('../Data/Transmission capacity of lines.csv', delimiter=';')

In [49]:
Nodes = ['Node 1', 'Node 2', 'Node 3', 'Node 4', 'Node 5', 'Node 6']
Generator = ['G1', 'G2', 'G3']
Generator_node = {'Node 1': 'G1', 'Node 2': 'G2', 'Node 6': 'G3'}
Load = ['L1', 'L2', 'L3']
Load_node = {'Node 4': 'L1', 'Node 5': 'L2', 'Node 6': 'L3'}
Wind = ['W1', 'W2']
Wind_node = {'Node 4': 'W1', 'Node 5': 'W2'}
Transmission = ['Line 1', 'Line 2', 'Line 3', 'Line 4', 'Line 5', 'Line 6','Line 7']
Transmission_node = {'Line 1': ['Node 1', 'Node 2'], 'Line 2': ['Node 2', 'Node 3'], 'Line 3': ['Node 3', 'Node 6'], 'Line 4': ['Node 5', 'Node 6'], 'Line 5': ['Node 4', 'Node 5'], 'Line 6': ['Node 2', 'Node 4'],'Line 6': ['Node 1', 'Node 4']}    

In [50]:
# Create matrix with the nodes as columns and the generators, loads and winds as rows, with 1 if connected to the node
Gen_n = np.zeros((len(Generator), len(Nodes)))
Load_n = np.zeros((len(Load), len(Nodes)))
Wind_n = np.zeros((len(Wind), len(Nodes)))
Transmission_n = np.zeros((len(Transmission), len(Nodes)))

# Populate the matrix
for i, g in enumerate(Generator):  # Iterate over generators
    for j, node in enumerate(Nodes):  # Iterate over nodes
        if Generator_node.get(node) == g:  # Check if generator is connected to the node
            Gen_n[i, j] = 1

for i, l in enumerate(Load):  # Iterate over loads
    for j, node in enumerate(Nodes):  # Iterate over nodes
        if Load_node.get(node) == l:  # Check if load is connected to the node
            Load_n[i, j] = 1

for i, w in enumerate(Wind):  # Iterate over winds
    for j, node in enumerate(Nodes):  # Iterate over nodes
        if Wind_node.get(node) == w:  # Check if wind is connected to the node
            Wind_n[i, j] = 1

for i, t in enumerate(Transmission):  # Iterate over transmission lines
    connected_nodes = Transmission_node.get(t, [])  # Get nodes connected by the transmission line
    for node in connected_nodes:  # For each node connected by the transmission line
        if node in Nodes:  # Ensure the node is valid (exists in Nodes list)
            j = Nodes.index(node)  # Get the column index for the node in Transmission_n
            Transmission_n[i, j] = 1  # Set the corresponding element to 1
            


In [51]:
# Define the input data class
class InputData:
    
    def __init__(
        self,
        wind_forecast: pd.DataFrame, 
        bus: pd.DataFrame,
        load: pd.DataFrame,
        max_prod: pd.DataFrame,
        min_prod: pd.DataFrame,
        min_down_time: pd.DataFrame,
        min_up_time: pd.DataFrame,
        prod_cost: pd.DataFrame,
        ramp_rate: pd.DataFrame,
        start_up_cost: pd.DataFrame,
        transmission_cap: pd.DataFrame
    ):
        self.time = range(len(wind_forecast))  #maybe define it with lenght of wind_production
        self.wind_forecast = wind_forecast
        self.bus = bus
        self.load = load
        self.max_prod = max_prod
        self.min_prod = min_prod
        self.min_down_time = min_down_time
        self.min_up_time = min_up_time
        self.prod_cost = prod_cost
        self.ramp_rate = ramp_rate
        self.start_up_cost = start_up_cost
        self.transmission_cap = transmission_cap
        self.M = 1000000  # Penalty for having flexible demand
        self.Gen_n = Gen_n  # Matrix mapping generators to nodes
        self.Load_n = Load_n # Matrix mapping loads to nodes
        self.Wind_n = Wind_n # Matrix mapping wind to nodes
        
        


In [52]:
class Expando(object):
    '''
        A small class which can have attributes set
    '''
    pass

In [53]:
# Define the optimization model class

class EconomicDispatch():
        
        def __init__(self, input_data: InputData):
            self.data = input_data 
            self.variables = Expando()
            self.constraints = Expando() 
            self.results = Expando() 
            self._build_model() 
            
        def _build_variables(self):
            # one binary variable for the status of each generator
            self.variables.status = {
                (i, t): self.model.addVar(vtype=GRB.BINARY, 
                                            name='status_G{}_{}'.format(i, t)) 
                                            for i in range(1, len(self.data.max_prod)+1) 
                                            for t in self.data.time}
            
            # one variable for each generator for each time of the day
            self.variables.prod_gen = {
                 (i, t): self.model.addVar(lb=0, ub=self.data.max_prod.iloc[i-1, 0], 
                                           name='generation_G{}_{}'.format(i, t)) 
                                           for i in range(1, len(self.data.max_prod)+1) 
                                           for t in self.data.time}
            
            # one variable for each wind generator for each time of the day
            self.variables.prod_wind = {
                 (i, t): self.model.addVar(lb=0, ub=self.data.wind_forecast.iloc[t, i], 
                                            name='wind_generation_W{}_{}'.format(i, t)) 
                                            for i in range(1, len(self.data.wind_forecast.iloc[0, :])) 
                                            for t in self.data.time}
            
            # one variable for each start-up cost for each generator
            self.variables.start_up_cost = {
                 (i, t): self.model.addVar(lb=0, 
                                            name='start_up_cost_G{}_{}'.format(i, t)) 
                                            for i in range(1, len(self.data.max_prod)+1) 
                                            for t in self.data.time}
            
            # add two slack variables to always make the model feasible, allowing the demand to be flexible
            self.variables.epsilon = {
                 (n, t): self.model.addVar(lb=0, 
                                           name='epsilon_Bus{}_{}'.format(n, t)) 
                                           for n in range(1, len(self.data.bus.iloc[0,:])+1) 
                                           for t in self.data.time}
            self.variables.delta = {
                 (n, t): self.model.addVar(lb=0, 
                                           name='delta_Bus{}_{}'.format(n, t))
                                           for n in range(1, len(self.data.bus.iloc[0,:])+1)
                                           for t in self.data.time}
            
            
        def _build_constraints(self):
            # Minimum capacity of the generator
            self.constraints.min_capacity = {
                (i, t): self.model.addConstr(
                    self.variables.prod_gen[i, t] >= self.data.min_prod.iloc[i-1, 0] * self.variables.status[i, t]
                ) for i in range(1, len(self.data.max_prod)+1) for t in self.data.time}
            # Maximum capacity of the generator
            self.constraints.max_capacity = {
                (i, t): self.model.addConstr(
                    self.variables.prod_gen[i, t] <= self.data.max_prod.iloc[i-1, 0] * self.variables.status[i, t]
                ) for i in range(1, len(self.data.max_prod)+1) for t in self.data.time}

            # Power balance constraint
            self.constraints.power_balance = {
                t: self.model.addConstr(
                    gp.quicksum(self.variables.prod_gen[i, t] for i in range(1, len(self.data.max_prod) + 1)) +
                    gp.quicksum(self.variables.prod_wind[i, t] for i in range(1, len(self.data.wind_forecast.iloc[0, :]))) == 
                    gp.quicksum(self.data.load.iloc[t, i] * Load_n[i-1, n-1] for i in range(1, len(self.data.load.iloc[0, :]))for n in range(1, len(self.data.bus.iloc[0, :]) + 1))
                    + gp.quicksum(self.variables.epsilon[n, t] - self.variables.delta[n, t] for n in range(1, len(self.data.bus.iloc[0, :]) + 1))
                ) for t in self.data.time}
        
            

            # Transmission capacity constraint up
            self.constraints.transmission_capacity_up = {
                    (l, t): self.model.addConstr(
                        gp.quicksum(
                            self.data.bus.iloc[l-1, n-1] * Transmission_n[l-1, n-1] * (
                                self.variables.prod_gen[g, t] * Gen_n[g-1, n-1] +
                                self.variables.prod_wind[w, t] * Wind_n[w-1, n-1] -
                                self.data.load.iloc[t, i] * Load_n[i-1, n-1] -
                                self.variables.epsilon[n, t] +
                                self.variables.delta[n, t]
                            )
                            for n in range(1, len(self.data.bus.iloc[0, :]) + 1)
                            for i in range(1, len(self.data.load.iloc[0, :]))
                            for g in range(1, len(self.data.max_prod) + 1)
                            for w in range(1, len(self.data.wind_forecast.iloc[0, :]))
                        ) <= self.data.transmission_cap.iloc[l-1, 0],
                        name="transmission_capacity_up_L{}_T{}".format(l, t)
                    ) for l in range(1, len(self.data.transmission_cap) + 1)
                    for t in self.data.time
                }


            #Transmission capacity constraint down
            self.constraints.transmission_capacity_down = {
                    (l, t): self.model.addConstr(
                        gp.quicksum(
                            self.data.bus.iloc[l-1, n-1] * Transmission_n[l-1, n-1] * (
                                self.variables.prod_gen[g, t] * Gen_n[g-1, n-1] +
                                self.variables.prod_wind[w, t] * Wind_n[w-1, n-1] -
                                self.data.load.iloc[t, i] * Load_n[i-1, n-1] -
                                self.variables.epsilon[n, t] +
                                self.variables.delta[n, t]
                            )
                            for n in range(1, len(self.data.bus.iloc[0, :]) + 1)
                            for i in range(1, len(self.data.load.iloc[0, :]))
                            for g in range(1, len(self.data.max_prod) + 1)
                            for w in range(1, len(self.data.wind_forecast.iloc[0, :]))
                        ) >= -self.data.transmission_cap.iloc[l-1, 0],
                        name="transmission_capacity_down_L{}_T{}".format(l, t)
                    ) for l in range(1, len(self.data.transmission_cap) + 1)
                    for t in self.data.time
                }

                                     

            #Start-up costs constraint
            self.constraints.start_up_cost = {
                (i, t): self.model.addConstr(
                    self.variables.start_up_cost[i, t] >= self.data.start_up_cost.iloc[i-1, 0] * (self.variables.status[i, t] - self.variables.status[i, t-1])
                ) for i in range(1, len(self.data.max_prod)+1) for t in self.data.time if t > 0}
            self.constraints.start_up_cost_0 = {
                i: self.model.addConstr(
                    self.variables.start_up_cost[i, 0] >= self.data.start_up_cost.iloc[i-1, 0] * self.variables.status[i, 0]
                ) for i in range(1, len(self.data.max_prod)+1)}
            
            # Ramping constraint
            self.constraints.ramping_up = {
                (i, t): self.model.addConstr(
                    self.variables.prod_gen[i, t] - self.variables.prod_gen[i, t-1] <= self.data.ramp_rate.iloc[i-1, 0]
                ) for i in range(1, len(self.data.max_prod)+1) for t in self.data.time if t > 0}
            self.constraints.ramping_down = {
                (i, t): self.model.addConstr(
                    self.variables.prod_gen[i, t-1] - self.variables.prod_gen[i, t] <= self.data.ramp_rate.iloc[i-1, 0]
                ) for i in range(1, len(self.data.max_prod)+1) for t in self.data.time if t > 0}
            
            # Minimum up time constraint
            self.constraints.min_up_time = {
                (i, t, to): self.model.addConstr(
                    -self.variables.status[i, t - 1] + self.variables.status[i, t] - self.variables.status[i, to] <= 0
                ) for i in range(1, len(self.data.max_prod)+1) 
                for t in self.data.time 
                for to in range(t, min(t + self.data.min_up_time.iloc[i-1, 0], len(self.data.time))) if t > 0}
            
            # Minimum down time constraint
            self.constraints.min_down_time = {
                (i, t, to): self.model.addConstr(
                    self.variables.status[i, t - 1] - self.variables.status[i, t] + self.variables.status[i, to] <= 1
                ) for i in range(1, len(self.data.max_prod)+1) 
                for t in self.data.time 
                for to in range(t, min(t + self.data.min_down_time.iloc[i-1, 0], len(self.data.time))) if t > 0}
            


        def _build_objective(self):
            # Objective function
            self.model.setObjective(
                gp.quicksum(self.data.prod_cost.iloc[i-1, 0]*self.variables.prod_gen[i, t] for i in range(1, len(self.data.max_prod)+1) for t in self.data.time) +
                gp.quicksum(self.variables.start_up_cost[i, t] for i in range(1, len(self.data.max_prod)+1) for t in self.data.time) +
                self.data.M * (gp.quicksum(self.variables.epsilon[n, t] + self.variables.delta[n, t] for n in range(1, len(self.data.bus.iloc[0,:])+1) for t in self.data.time))
            )

        def _build_model(self):
            self.model = gp.Model('EconomicDispatch')
            self._build_variables()
            self._build_constraints()
            self._build_objective()
            self.model.update()

        def optimize(self):
            self.model.optimize()
            self._extract_results()

        def _extract_results(self):
            self.results.production = pd.DataFrame({
                #'time': [t for t in self.data.time],
                #'status G1': [self.variables.status[1, t].x for t in self.data.time],
                #'status G2': [self.variables.status[2, t].x for t in self.data.time],
                #'status G3': [self.variables.status[3, t].x for t in self.data.time],
                #'start_up_cost 1': [self.variables.start_up_cost[1, t].x for t in self.data.time],
                #'start_up_cost 2': [self.variables.start_up_cost[2, t].x for t in self.data.time],
                #'start_up_cost 3': [self.variables.start_up_cost[3, t].x for t in self.data.time],
                'generation 1': [self.variables.prod_gen[1, t].x for t in self.data.time],
                'generation 2': [self.variables.prod_gen[2, t].x for t in self.data.time],
                'generation 3': [self.variables.prod_gen[3, t].x for t in self.data.time],
                'wind generation 1': [self.variables.prod_wind[1, t].x for t in self.data.time],
                'wind generation 2': [self.variables.prod_wind[2, t].x for t in self.data.time],
                'load 1': [self.data.load.iloc[t, 1] for t in self.data.time],
                'load 2': [self.data.load.iloc[t, 2] for t in self.data.time],
                'load 3': [self.data.load.iloc[t, 3] for t in self.data.time],
                'epsilon 1': [self.variables.epsilon[1, t].x for t in self.data.time],
                'delta 1': [self.variables.delta[1, t].x for t in self.data.time],
                'epsilon 2': [self.variables.epsilon[2, t].x for t in self.data.time],
                'delta 2': [self.variables.delta[2, t].x for t in self.data.time],
                'epsilon 3': [self.variables.epsilon[3, t].x for t in self.data.time],
                'delta 3': [self.variables.delta[3, t].x for t in self.data.time],
                'epsilon 4': [self.variables.epsilon[4, t].x for t in self.data.time],
                'delta 4': [self.variables.delta[4, t].x for t in self.data.time],
                'epsilon 5': [self.variables.epsilon[5, t].x for t in self.data.time],
                'delta 5': [self.variables.delta[5, t].x for t in self.data.time],
                'epsilon 6': [self.variables.epsilon[6, t].x for t in self.data.time],
                'delta 6': [self.variables.delta[6, t].x for t in self.data.time]
            })

            # Add columns for each transmission line's binding status at each time
            for l in range(1, len(self.data.transmission_cap) + 1):
                up_binding = []
                down_binding = []
                
                for t in self.data.time:
                    up_constraint = self.constraints.transmission_capacity_up[l, t]
                    down_constraint = self.constraints.transmission_capacity_down[l, t]

                    # Append binding status (True if binding, based on slack value)
                    up_binding.append(abs(up_constraint.slack) < 1e-6)
                    down_binding.append(abs(down_constraint.slack) < 1e-6)

                # Add the binding status as new columns in the main production DataFrame
                self.results.production[f'transmission_up_binding_L{l}'] = up_binding
                self.results.production[f'transmission_down_binding_L{l}'] = down_binding

            self.results.unit_commitment = pd.DataFrame({
                'time': [t for t in self.data.time],
                'G1': [self.variables.status[1, t].x for t in self.data.time],
                'G2': [self.variables.status[2, t].x for t in self.data.time],
                'G3': [self.variables.status[3, t].x for t in self.data.time]
            })
            
        def _print_model(self):
            self.model.write('EconomicDispatch.lp')        
            
                 

In [54]:
# Run the model
input_data = InputData(wind_forecast, bus, load, max_prod, min_prod, min_down_time, min_up_time, prod_cost, ramp_rate, start_up_cost, transmission_cap)
model = EconomicDispatch(input_data)
model.optimize()
results = model.results.production
unit_comitment = model.results.unit_commitment #to be used for the next steps
# model._build_model()
# model._print_model()


Gurobi Optimizer version 11.0.3 build v11.0.3rc0 (win64 - Windows 11.0 (22631.2))

CPU model: AMD Ryzen 7 PRO 4750U with Radeon Graphics, instruction set [SSE2|AVX|AVX2]
Thread count: 8 physical cores, 16 logical processors, using up to 16 threads

Optimize a model with 1043 rows, 552 columns and 3240 nonzeros
Model fingerprint: 0xa93cbee5
Variable types: 480 continuous, 72 integer (72 binary)
Coefficient statistics:
  Matrix range     [4e-01, 9e+02]
  Objective range  [1e+00, 1e+06]
  Bounds range     [2e-01, 2e+02]
  RHS range        [1e+00, 6e+02]
Found heuristic solution: objective 9.487611e+09
Presolve removed 338 rows and 49 columns
Presolve time: 0.01s
Presolved: 705 rows, 503 columns, 2787 nonzeros
Variable types: 408 continuous, 95 integer (95 binary)

Root relaxation: objective 5.442191e+08, 162 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It

In [55]:
model.results.production

Unnamed: 0,generation 1,generation 2,generation 3,wind generation 1,wind generation 2,load 1,load 2,load 3,epsilon 1,delta 1,...,transmission_up_binding_L3,transmission_down_binding_L3,transmission_up_binding_L4,transmission_down_binding_L4,transmission_up_binding_L5,transmission_down_binding_L5,transmission_up_binding_L6,transmission_down_binding_L6,transmission_up_binding_L7,transmission_down_binding_L7
0,100.0,14.847529,65.443147,0.0,16.9,9.85,87.82,111.07,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
1,100.0,33.337126,62.003147,0.0,20.53,8.7,120.19,107.63,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
2,100.0,47.666243,70.0,0.0,6.02,8.26,120.12,127.04,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
3,100.0,36.622461,66.593147,0.0,24.68,10.69,124.92,112.22,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
4,100.0,29.339828,69.653147,0.0,9.69,10.62,102.87,115.28,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
5,100.0,45.38534,70.0,0.0,9.94,10.85,114.29,128.08,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
6,100.0,20.153049,59.663147,0.0,14.47,9.06,95.27,105.29,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
7,100.0,33.261767,59.933147,0.0,7.72,9.95,108.39,105.56,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
8,100.0,32.159274,70.0,0.0,1.31,10.29,97.76,119.23,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False
9,100.0,33.446104,66.453147,0.0,26.86,10.54,122.04,112.08,0.0,0.0,...,True,False,False,False,True,False,False,False,False,False


In [56]:
model.results.unit_commitment 

Unnamed: 0,time,G1,G2,G3
0,0,1.0,1.0,1.0
1,1,1.0,1.0,1.0
2,2,1.0,1.0,1.0
3,3,1.0,1.0,1.0
4,4,1.0,1.0,1.0
5,5,1.0,1.0,1.0
6,6,1.0,1.0,1.0
7,7,1.0,1.0,1.0
8,8,1.0,1.0,1.0
9,9,1.0,1.0,1.0


In [57]:
features = model.results.production[['wind generation 1', 'wind generation 2', 'load 1', 'load 2', 'load 3']]

# Use the unit commitment results as labels
labels = model.results.unit_commitment[['G1', 'G2', 'G3']]

# Separate features (X) and labels (y)
X = features
y = labels
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Training Set: {X_train.shape}, Validation Set: {X_val.shape}, Test Set: {X_test.shape}")


Training Set: (16, 5), Validation Set: (4, 5), Test Set: (4, 5)


In [58]:
# Check if units are always ON or OFF
print(y_train.mean())  # If mean = 1 or 0, the unit is always ON or OFF

# Filter out units that are always ON or OFF
units_to_classify = y_train.columns[(y_train.mean() > 0) & (y_train.mean() < 1)]
print(f"Units to classify: {units_to_classify}")


G1    1.0
G2    1.0
G3    1.0
dtype: float64
Units to classify: Index([], dtype='object')


In [59]:
print((y_train['G2'].value_counts))
print((y_val['G2'].value_counts))
print((y_train['G2'].value_counts))

<bound method IndexOpsMixin.value_counts of 21    1.0
5     1.0
2     1.0
12    1.0
15    1.0
3     1.0
4     1.0
22    1.0
17    1.0
20    1.0
23    1.0
7     1.0
10    1.0
14    1.0
19    1.0
6     1.0
Name: G2, dtype: float64>
<bound method IndexOpsMixin.value_counts of 0     1.0
11    1.0
18    1.0
13    1.0
Name: G2, dtype: float64>
<bound method IndexOpsMixin.value_counts of 21    1.0
5     1.0
2     1.0
12    1.0
15    1.0
3     1.0
4     1.0
22    1.0
17    1.0
20    1.0
23    1.0
7     1.0
10    1.0
14    1.0
19    1.0
6     1.0
Name: G2, dtype: float64>


In [60]:
# Linear Classifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Define logistic regression models for each generator
logistic_models = {}

# Loop through each generator in the labels
for unit in y.columns:  # e.g., 'G1', 'G2', 'G3'
    print(f"\nTraining Logistic Regression for {unit}...")

    # Extract training and validation data for this unit
    y_train_unit = y_train[unit]
    y_val_unit = y_val[unit]

    # Initialize and train the logistic regression model
    logistic = LogisticRegression(random_state=42)
    logistic.fit(X_train, y_train_unit)
    logistic_models[unit] = logistic

    # Predict on validation set
    y_pred_val = logistic.predict(X_val)

    # Evaluate performance
    accuracy = accuracy_score(y_val_unit, y_pred_val)
    print(f"Validation Accuracy for {unit}: {accuracy}")
    print(classification_report(y_val_unit, y_pred_val))



Training Logistic Regression for G1...


ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 1.0

In [None]:
## Non-Linear Classifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Define SVM models for each generator
svm_models = {}

# Loop through each generator in the labels
for unit in y.columns:  # e.g., 'G1', 'G2', 'G3'
    print(f"\nTraining SVM (RBF Kernel) for {unit}...")

    # Extract training and validation data for this unit
    y_train_unit = y_train[unit]
    y_val_unit = y_val[unit]

    # Initialize and train the SVM model
    svm = SVC(kernel='rbf', random_state=42)
    svm.fit(X_train, y_train_unit)
    svm_models[unit] = svm

    # Predict on validation set
    y_pred_val = svm.predict(X_val)

    # Evaluate performance
    accuracy = accuracy_score(y_val_unit, y_pred_val)
    print(f"Validation Accuracy for {unit}: {accuracy}")
    print(classification_report(y_val_unit, y_pred_v))


In [None]:
#Evaluate Models
from sklearn.metrics import accuracy_score, classification_report

# Evaluate all models on the test set
for unit in y_test.columns:  # e.g., 'G1', 'G2', 'G3'
    print(f"\nEvaluating models for {unit}:")

    # Logistic Regression
    logistic_model = logistic_models[unit]  # Retrieve the trained logistic regression model
    y_pred_test_logistic = logistic_model.predict(X_test)  # Predict on the test set
    print(f"Logistic Regression Test Accuracy for {unit}: {accuracy_score(y_test[unit], y_pred_test_logistic)}")
    print(classification_report(y_test[unit], y_pred_test_logistic))

    # SVM
    svm_model = svm_models[unit]  # Retrieve the trained SVM model
    y_pred_test_svm = svm_model.predict(X_test)  # Predict on the test set
    print(f"SVM Test Accuracy for {unit}: {accuracy_score(y_test[unit], y_pred_test_svm)}")
    print(classification_report(y_test[unit], y_pred_test_svm))