In [19]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.layers import Dense,Dropout

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from sklearn.svm import SVC

In [None]:
def run_ML_models(X:pd.DataFrame, y:np.array, algorithms:list, cv=10, scoring='accuracy'):
  """_summary_

  Args:
      X (pd.DataFrame): _description_
      y (np.array): _description_
  """
  
  # Split the data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  accuracies = {}
  for algorithm in algorithms:
  
    if type(algorithm).__name__ == "Sequential":
          # Building the model
          model = Sequential()
          model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))  # First hidden layer
          model.add(Dense(15, activation='relu'))  # Second hidden layer with 15 neurons
          model.add(Dense(1, activation='sigmoid'))  # Output layer
          # Compile the model
          model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
          # Train the model
          model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=1)
          accuracies[type(algorithm).__name__] = np.mean(scores)
    else:
          algorithm.fit(X_train, y_train)
          scores = cross_val_score(algorithm, X_test, y_test, cv=cv, scoring=scoring)
          accuracies[type(algorithm).__name__] = np.mean(scores)
  
  return accuracies


def plot_algorithms(accuracies):
    """
    Plot a bar graph of accuracies for each algorithm.

    Parameters:
    - accuracies: Dictionary containing the accuracies of each algorithm.
    """

    algorithms = list(accuracies.keys())
    scores = list(accuracies.values())

    plt.figure(figsize=(10, 6))
    plt.barh(algorithms, scores, color='skyblue')
    plt.xlabel('Accuracy')
    plt.ylabel('Algorithm')
    plt.title('Accuracy of Machine Learning Algorithms')
    plt.xlim(0, 1)  # Set x-axis limits to range from 0 to 1
    plt.show()

In [2]:
df = pd.read_csv("Steel_industry.csv")
df

Unnamed: 0,Date_Time,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_Of_Week,Load_Type
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load
1,01/01/2018 00:30,4.00,4.46,0.00,0.0,66.77,100.00,1800,Weekday,Monday,Light_Load
2,01/01/2018 00:45,3.24,3.28,0.00,0.0,70.28,100.00,2700,Weekday,Monday,Light_Load
3,01/01/2018 01:00,3.31,3.56,0.00,0.0,68.09,100.00,3600,Weekday,Monday,Light_Load
4,01/01/2018 01:15,3.82,4.50,0.00,0.0,64.72,100.00,4500,Weekday,Monday,Light_Load
...,...,...,...,...,...,...,...,...,...,...,...
35036,31/12/2018 23:15,3.74,3.74,0.00,0.0,70.71,100.00,83700,Weekday,Monday,Light_Load
35037,31/12/2018 23:30,3.78,3.17,0.07,0.0,76.62,99.98,84600,Weekday,Monday,Light_Load
35038,31/12/2018 23:45,3.78,3.06,0.11,0.0,77.72,99.96,85500,Weekday,Monday,Light_Load
35039,31/12/2018 00:00,3.67,3.02,0.07,0.0,77.22,99.98,0,Weekday,Monday,Light_Load


In [7]:
df.drop("Date_Time", inplace=True, axis=1)

In [10]:
df.describe()

Unnamed: 0,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM
count,35041.0,35041.0,35041.0,35041.0,35041.0,35041.0,35041.0
mean,27.386229,13.035012,3.871412,0.011524,80.57861,84.366038,42750.834736
std,33.444133,16.305916,7.424863,0.016151,18.921337,30.45803,24940.667915
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3.2,2.3,0.0,0.0,63.32,99.7,21600.0
50%,4.57,5.0,0.0,0.0,87.96,100.0,43200.0
75%,51.23,22.64,2.09,0.02,99.03,100.0,64800.0
max,157.18,96.91,27.76,0.07,100.0,100.0,85500.0


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35041 entries, 0 to 35040
Data columns (total 10 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Usage_kWh                             35041 non-null  float64
 1   Lagging_Current_Reactive.Power_kVarh  35041 non-null  float64
 2   Leading_Current_Reactive_Power_kVarh  35041 non-null  float64
 3   CO2(tCO2)                             35041 non-null  float64
 4   Lagging_Current_Power_Factor          35041 non-null  float64
 5   Leading_Current_Power_Factor          35041 non-null  float64
 6   NSM                                   35041 non-null  int64  
 7   WeekStatus                            35041 non-null  object 
 8   Day_Of_Week                           35041 non-null  object 
 9   Load_Type                             35041 non-null  object 
dtypes: float64(6), int64(1), object(3)
memory usage: 2.7+ MB


In [17]:

target_encoder = LabelEncoder()
encoded_target = target_encoder.fit_transform(df['Load_Type'])
df.drop("Load_Type",axis=1, inplace=True)

columns_to_encode = ["WeekStatus", "Day_Of_Week"]

preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), columns_to_encode)
    ],
    remainder='passthrough'  # Leave the rest of the columns unchanged
)

df_encoded = pd.DataFrame(preprocessor.fit_transform(df), columns=preprocessor.get_feature_names_out())

In [None]:
LR = LogisticRegression()
RFC = RandomForestClassifier()
DTC = DecisionTreeClassifier()
SVC = SVC()
NEURAL_NETWORK = Sequential()
algorithms = [LR, RFC, DTC, SVC, NEURAL_NETWORK]

# Compare algorithms and plot accuracies
accuracies = run_ML_models(df_encoded,encoded_target, algorithms)
plot_algorithms(accuracies)