# Generate Synthetic data

In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

np.random.seed(42)

n = 1000

machine_types = ['Machine_A', 'Machine_B', 'Machine_C', 'Machine_D']
shift_types = ['Day', 'Night', 'Mixed']

machine_type = np.random.choice(machine_types, size=n)
shift_type = np.random.choice(shift_types, size=n)
operation_time = np.random.normal(10, 2, n)  # Mean operation time 10 hours, std dev 2
production_volume = np.random.normal(1000, 150, n)  # Mean production 1000 units, std dev 150
temperature = np.random.normal(22, 2, n)  # Mean temperature 22°C, std dev 2
energy_consumption = np.random.normal(200, 50, n)  # Mean energy consumption 200 kWh, std dev 50

energy_efficiency = []

for e in energy_consumption:
    if e < 180:
        energy_efficiency.append('Low')
    elif e <= 220:
        energy_efficiency.append('Optimal')
    else:
        energy_efficiency.append('High')

data = pd.DataFrame({
    'Machine_Type': machine_type,
    'Shift_Type': shift_type,
    'Operation_Time (hrs)': operation_time,
    'Production_Volume': production_volume,
    'Temperature (C)': temperature,
    'Energy_Consumption (kWh)': energy_consumption,
    'Energy_Efficiency': energy_efficiency
})

le_machine = LabelEncoder()
le_shift = LabelEncoder()
data['Machine_Type'] = le_machine.fit_transform(data['Machine_Type'])
data['Shift_Type'] = le_shift.fit_transform(data['Shift_Type'])

data.head()


Unnamed: 0,Machine_Type,Shift_Type,Operation_Time (hrs),Production_Volume,Temperature (C),Energy_Consumption (kWh),Energy_Efficiency
0,2,2,11.81499,870.173022,19.148813,243.277922,High
1,3,1,10.071789,1031.253294,23.625828,232.879924,High
2,0,0,13.730006,1016.074034,20.44522,222.848368,High
3,2,0,11.640965,1179.503623,23.386649,265.645133,High
4,2,0,12.207428,1040.780974,21.14331,228.000757,High


# Classification Model

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import Counter

X = data.drop('Energy_Efficiency', axis=1)
y = data['Energy_Efficiency']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Basic Decision Tree Model Implementation

class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth  
        self.tree = None

    def _gini(self, y):
        """Calculate Gini impurity for a set of labels."""
        class_counts = Counter(y)
        total = len(y)
        gini = 1 - sum((count / total) ** 2 for count in class_counts.values())
        return gini

    def _best_split(self, X, y):
        """Find the best feature and value to split on."""
        best_gini = float('inf')
        best_split = None
        best_left = None
        best_right = None
        
        for feature_index in range(X.shape[1]):
            possible_splits = set(X[:, feature_index])
            for value in possible_splits:
                left_mask = X[:, feature_index] <= value
                right_mask = ~left_mask
                
                left_y = y[left_mask]
                right_y = y[right_mask]
                
                gini = (len(left_y) / len(y)) * self._gini(left_y) + (len(right_y) / len(y)) * self._gini(right_y)
                
                if gini < best_gini:
                    best_gini = gini
                    best_split = (feature_index, value)
                    best_left = (left_mask, left_y)
                    best_right = (right_mask, right_y)

        return best_split, best_left, best_right

    def _build_tree(self, X, y, depth=0):
      
        if (self.max_depth and depth >= self.max_depth) or len(set(y)) == 1:
            return Counter(y).most_common(1)[0][0]
        
        # Find the best split
        best_split, best_left, best_right = self._best_split(X, y)
        
        if not best_split:
            return Counter(y).most_common(1)[0][0]
        
        feature_index, value = best_split
        left_mask, left_y = best_left
        right_mask, right_y = best_right
        
        left_tree = self._build_tree(X[left_mask], left_y, depth + 1)
        right_tree = self._build_tree(X[right_mask], right_y, depth + 1)
        
        return {
            'feature_index': feature_index,
            'value': value,
            'left': left_tree,
            'right': right_tree
        }

    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        self.tree = self._build_tree(X, y)

    def _predict_one(self, x, tree):
        if not isinstance(tree, dict):
            return tree
        if x[tree['feature_index']] <= tree['value']:
            return self._predict_one(x, tree['left'])
        else:
            return self._predict_one(x, tree['right'])

    def predict(self, X):
        X = np.array(X)
        return [self._predict_one(x, self.tree) for x in X]

# Initialize and train the Decision Tree
model = DecisionTree(max_depth=5)
model.fit(X_train.values, y_train)

# Make predictions
y_pred = model.predict(X_test.values)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(y_pred)


Accuracy: 100.00%
['High', 'High', 'Low', 'Optimal', 'Low', 'Low', 'Low', 'High', 'High', 'High', 'High', 'High', 'Optimal', 'Low', 'Low', 'High', 'Low', 'Low', 'High', 'Low', 'Low', 'Low', 'Optimal', 'High', 'Optimal', 'Optimal', 'Optimal', 'Optimal', 'Low', 'Optimal', 'High', 'Low', 'Optimal', 'Optimal', 'Optimal', 'Low', 'High', 'Low', 'Optimal', 'High', 'Optimal', 'Optimal', 'High', 'Low', 'High', 'High', 'High', 'High', 'Low', 'Optimal', 'Optimal', 'Low', 'Optimal', 'Low', 'High', 'Low', 'Low', 'Low', 'Low', 'High', 'Low', 'High', 'Low', 'Low', 'High', 'High', 'Low', 'High', 'High', 'Optimal', 'Low', 'High', 'High', 'Low', 'Optimal', 'Optimal', 'High', 'High', 'Low', 'High', 'Optimal', 'Low', 'High', 'High', 'Optimal', 'Low', 'High', 'Low', 'Low', 'Low', 'High', 'Optimal', 'High', 'High', 'High', 'Low', 'Optimal', 'Low', 'High', 'Optimal', 'High', 'Low', 'High', 'Low', 'Optimal', 'Low', 'Low', 'Optimal', 'High', 'High', 'High', 'Low', 'Optimal', 'High', 'High', 'Low', 'High', 'Low