In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
import numpy as np

In [2]:
data = pd.read_csv(r'D:\priya\Steel_industry_data.csv') 
data


Unnamed: 0,date,Usage_kWh,Lagging_Current_Reactive.Power_kVarh,Leading_Current_Reactive_Power_kVarh,CO2(tCO2),Lagging_Current_Power_Factor,Leading_Current_Power_Factor,NSM,WeekStatus,Day_of_week,Load_Type
0,01/01/2018 00:15,3.17,2.95,0.00,0.0,73.21,100.00,900,Weekday,Monday,Light_Load
1,01/01/2018 00:30,4.00,4.46,0.00,0.0,66.77,100.00,1800,Weekday,Monday,Light_Load
2,01/01/2018 00:45,3.24,3.28,0.00,0.0,70.28,100.00,2700,Weekday,Monday,Light_Load
3,01/01/2018 01:00,3.31,3.56,0.00,0.0,68.09,100.00,3600,Weekday,Monday,Light_Load
4,01/01/2018 01:15,3.82,4.50,0.00,0.0,64.72,100.00,4500,Weekday,Monday,Light_Load
...,...,...,...,...,...,...,...,...,...,...,...
35035,31/12/2018 23:00,3.85,4.86,0.00,0.0,62.10,100.00,82800,Weekday,Monday,Light_Load
35036,31/12/2018 23:15,3.74,3.74,0.00,0.0,70.71,100.00,83700,Weekday,Monday,Light_Load
35037,31/12/2018 23:30,3.78,3.17,0.07,0.0,76.62,99.98,84600,Weekday,Monday,Light_Load
35038,31/12/2018 23:45,3.78,3.06,0.11,0.0,77.72,99.96,85500,Weekday,Monday,Light_Load


In [4]:
X = data.drop('WeekStatus', axis=1) 
y = data['WeekStatus'] 

In [5]:
numeric_columns = X.select_dtypes(include=['number']).columns
X[numeric_columns] = X[numeric_columns].fillna(X[numeric_columns].mean())

In [6]:
categorical_columns = X.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_columns), 
    ],
    remainder='passthrough'  
)


In [9]:
noise_factor = 0.1
noise = np.random.normal(0, noise_factor, X_train.shape)
X_train_noisy = X_train + noise

In [10]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),  
    ('classifier', RandomForestClassifier( n_estimators=50,max_depth=5,min_samples_split=20,min_samples_leaf=10,random_state=42))
])

In [11]:
pipeline.fit(X_train_noisy, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('num', StandardScaler(),
                                                  Index(['Usage_kWh', 'Lagging_Current_Reactive.Power_kVarh',
       'Leading_Current_Reactive_Power_kVarh', 'CO2(tCO2)',
       'Lagging_Current_Power_Factor', 'Leading_Current_Power_Factor', 'NSM'],
      dtype='object'))])),
                ('classifier',
                 RandomForestClassifier(max_depth=5, min_samples_leaf=10,
                                        min_samples_split=20, n_estimators=50,
                                        random_state=42))])

In [12]:
y_pred = pipeline.predict(X_test)

In [13]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 99.51%


In [14]:
def predict_weekstatus(input_data):
    for col, le in label_encoders.items():
        if col in input_data:
            input_data[col] = le.transform([input_data[col]])[0]
    
    input_data = pd.DataFrame([input_data])

    prediction = pipeline.predict(input_data)
    
    return prediction[0]

In [15]:
input_data = {
    'Usage_kWh': 3.5,
    'Lagging_Current_Reactive.Power_kVarh': 4.0,
    'Leading_Current_Reactive_Power_kVarh': 0.0,
    'CO2(tCO2)': 0.5,
    'Lagging_Current_Power_Factor': 70.0,
    'Leading_Current_Power_Factor': 100.0,
    'NSM': 5400,
    'Day_of_week': 'Monday',
    'Load_Type': 'Light_Load',
    'date': '01/01/2018 00:30'
}

In [16]:
predicted_weekstatus = predict_weekstatus(input_data)
print(f'The predicted WeekStatus for the input data is: {predicted_weekstatus}')

The predicted WeekStatus for the input data is: Weekday
