<a href="https://colab.research.google.com/github/Sukheshkanna13/intel-ie-AI-manufacturing-assistant-/blob/main/Intely.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **INSTALLING REQUIRED LIBRARIES**

In [1]:
!pip install scikit-learn-intelex==2024.7.0
!pip install onnx skl2onnx
!pip install onnxruntime pandas scikit-learn-intelex transformers
!pip install pandas scikit-learn-intelex joblib



# TRAINING GRADIENT BOOST MODEL

In [2]:

import pandas as pd
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error,mean_absolute_percentage_error
data = pd.read_csv('cleaned_data.csv')
features = ['cores', 'Freq (MHz)', 'TDP (W)', 'Die Size (mm^2)', 'Transistors (million)', 'Process Size (nm)']
target = 'cpuMark'
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GradientBoostingRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
mape = mean_absolute_percentage_error(y_test, y_pred)
accuracy = 100 - (mape * 100)
print('Accuracy:', round(accuracy, 2), '%')
import joblib
joblib.dump(model, 'gradient_boosting_model.pkl')
new_cpu_df = pd.DataFrame([[10, 3700, 125, 200, 10000, 7]], columns=features)
predicted_cpumark = model.predict(new_cpu_df)
print(f"Predicted cpuMark for new CPU: {predicted_cpumark[0]}")

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


Mean Squared Error: 2321658.198307937
Accuracy: 95.47 %
Predicted cpuMark for new CPU: 21036.149997671837


In [3]:

import joblib
model = joblib.load('/content/gradient_boosting_model.pkl')
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([None, 6]))]
onnx_model = convert_sklearn(model, initial_types=initial_type)
import onnx
onnx.save_model(onnx_model, "gbf.onnx")

# **PREDICTING THROUGHT THE MODEL**

In [5]:
import onnxruntime as rt
import numpy as np
import pandas as pd
session = rt.InferenceSession("gbf.onnx")
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
print(f"Input name: {input_name}")
print(f"Output name: {output_name}")
features = ['Cores', 'Base Clock (MHz)', 'TDP (W)', 'Socket', 'L3 Cache (MB)', 'Threads']
new_cpu_df = pd.DataFrame([[10, 3700, 125, 206, 624, 14]], columns=features)
input_data = new_cpu_df.values.astype(np.float32)
result = session.run([output_name], {input_name: input_data})
print("Inference result:", result)

Input name: float_input
Output name: variable
Inference result: [array([[19438.621]], dtype=float32)]


In [7]:
import onnxruntime as rt
import pandas as pd
import numpy as np
from transformers import pipeline
session = rt.InferenceSession("gbf.onnx")
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
generator = pipeline('text-generation', model='gpt2')
def generate_cpu_configurations(num_configs=5):
    configurations = []
    for _ in range(num_configs):
        cores = np.random.randint(4, 16)
        base_clock = np.random.randint(2500, 5000)
        tdp = np.random.randint(65, 150)
        socket = np.random.choice([206, 207, 208])
        l3_cache = np.random.randint(4, 32)
        threads = cores * 2
        configurations.append([cores, base_clock, tdp, socket, l3_cache, threads])
    return pd.DataFrame(configurations, columns=['Cores', 'Base Clock (MHz)', 'TDP (W)', 'Socket', 'L3 Cache (MB)', 'Threads'])
new_cpu_configs = generate_cpu_configurations()
print("Generated CPU Configurations:\n", new_cpu_configs)
input_data = new_cpu_configs.values.astype(np.float32)
predictions = session.run([output_name], {input_name: input_data})
for config, prediction in zip(new_cpu_configs.values, predictions):
    print(f"Configuration: {config} - Predicted Performance: {prediction[0]}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Generated CPU Configurations:
    Cores  Base Clock (MHz)  TDP (W)  Socket  L3 Cache (MB)  Threads
0      5              3077      136     208              9       10
1      4              2845       97     207              4        8
2     14              3308      125     206              7       28
3     10              2721      130     208             11       20
4     15              4405       76     207             13       30
Configuration: [   5 3077  136  208    9   10] - Predicted Performance: [6783.0107]




In [8]:

import onnxruntime as rt
import pandas as pd
import numpy as np
session = rt.InferenceSession("gbf.onnx")
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
def generate_cpu_configurations(num_configs=5):
    configurations = []
    for _ in range(num_configs):
        cores = np.random.randint(4, 16)
        base_clock = np.random.randint(2500, 5000)
        tdp = np.random.randint(65, 150)
        socket = np.random.choice([206, 207, 208])
        l3_cache = np.random.randint(4, 32)
        threads = cores * 2
        configurations.append([cores, base_clock, tdp, socket, l3_cache, threads])
    return pd.DataFrame(configurations, columns=['Cores', 'Base Clock (MHz)', 'TDP (W)', 'Socket', 'L3 Cache (MB)', 'Threads'])
new_cpu_configs = generate_cpu_configurations()
print("Generated CPU Configurations:\n", new_cpu_configs)
input_data = new_cpu_configs.values.astype(np.float32)
predictions = session.run([output_name], {input_name: input_data})
best_index = np.argmax(predictions)
best_configuration = new_cpu_configs.iloc[best_index]
best_performance = predictions[0][best_index]
print("\nBest Configuration:")
print(best_configuration)
print(f"Predicted Performance: {best_performance}")


Generated CPU Configurations:
    Cores  Base Clock (MHz)  TDP (W)  Socket  L3 Cache (MB)  Threads
0      8              3709       93     208             28       16
1      8              4165      140     207             28       16
2      7              4126      130     206              9       14
3      7              4929      146     207             18       14
4      4              4167      131     208             27        8

Best Configuration:
Cores                  8
Base Clock (MHz)    4165
TDP (W)              140
Socket               207
L3 Cache (MB)         28
Threads               16
Name: 1, dtype: int64
Predicted Performance: [19099.227]


In [9]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
import joblib
data = pd.read_csv('cleaned_data.csv')
features = [
    'Process Size (nm)',
    'TDP (W)',
    'Die Size (mm^2)',
    'Transistors (million)',
    'Freq (MHz)',
    'cpuMark',
    'cpuValue',
    'threadMark',
    'threadValue',
    'cores'
]
target = 'Perf_per_Watt'
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
gbr_model = GradientBoostingRegressor()
gbr_model.fit(X_train, y_train)
joblib.dump(gbr_model, 'gbr_energy_efficiency_model.pkl')
def generate_cpu_configurations(num_configs=5):
    configurations = []
    for _ in range(num_configs):
        process_size = np.random.choice([7, 10, 14, 16, 22])
        tdp = np.random.randint(35, 250)
        die_size = np.random.uniform(50, 200)
        transistors = np.random.randint(100, 5000)
        freq = np.random.randint(2000, 5000)
        cpu_mark = np.random.randint(1000, 30000)
        cpu_value = np.random.randint(500, 15000)
        thread_mark = np.random.randint(1000, 20000)
        thread_value = np.random.randint(500, 15000)
        cores = np.random.randint(2, 16)
        configurations.append([
            process_size, tdp, die_size, transistors,
            freq, cpu_mark, cpu_value, thread_mark, thread_value, cores
        ])
    return pd.DataFrame(configurations, columns=[
        'Process Size (nm)', 'TDP (W)', 'Die Size (mm^2)',
        'Transistors (million)', 'Freq (MHz)', 'cpuMark',
        'cpuValue', 'threadMark', 'threadValue', 'cores'
    ])
num_configs = 5
new_cpu_configs = generate_cpu_configurations(num_configs)
print("Generated CPU Configurations:\n", new_cpu_configs)
input_data = new_cpu_configs.values.astype(np.float32)
predictions = gbr_model.predict(input_data)
best_index = np.argmax(predictions)
best_configuration = new_cpu_configs.iloc[best_index]
best_performance = predictions[best_index]
print("\nBest Configuration:")
print(best_configuration)
print(f"Predicted Performance per Watt: {best_performance}")


Generated CPU Configurations:
    Process Size (nm)  TDP (W)  Die Size (mm^2)  Transistors (million)  \
0                 16       38        78.577217                    245   
1                 10      103       134.957815                   4882   
2                 16       81       164.986143                   4612   
3                 16       70       186.352847                   2755   
4                 22       44       137.821280                   3956   

   Freq (MHz)  cpuMark  cpuValue  threadMark  threadValue  cores  
0        4014    25300      4661       15213         8244     11  
1        4718     8942     13197       14116         1173      4  
2        2569    18278     13932       19663         1821      3  
3        2286    25108     10296       15575          969     10  
4        2715    29642      7449        1120        11767     11  

Best Configuration:
Process Size (nm)           16.000000
TDP (W)                     38.000000
Die Size (mm^2)             78.

