<a href="https://colab.research.google.com/github/AmritSDutta/colab_ml/blob/main/All_the_steps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opendatasets

In [None]:
import opendatasets as od
import pandas as pd

od.download("https://www.kaggle.com/datasets/himanshunakrani/iris-dataset")

In [None]:
df= pd.read_csv("/content/iris-dataset/iris.csv")

In [None]:
df.head()


In [None]:
from sklearn.preprocessing import LabelEncoder

# Creating a instance of label Encoder.
le = LabelEncoder()

# Using .fit_transform function to fit label
# encoder and return encoded label
label = le.fit_transform(df['species'])

# Appending the array to our dataFrame
# with column name 'Purchased'
df["y"] = label
df.head()

In [None]:
from sklearn.utils import shuffle
df_shuffled = shuffle(df)
df_shuffled.head()

In [None]:
y = df_shuffled["y"].to_numpy()
X = df_shuffled.copy().drop(["species","y"],axis=1).to_numpy()

**Standardize the data**


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Trainning

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
output = classifier.fit(X_train, y_train)
print(f'Train output: {output.score(X_train, y_train)}')
print(f'Test output: {output.score(X_test,y_test)}')


**Model explain: PermutationImportance**

In [None]:
!pip install eli5

In [None]:
import eli5
from eli5.sklearn import PermutationImportance
dummy_df = df_shuffled.copy().drop(["species","y"],axis=1)

perm = PermutationImportance(classifier, random_state=1).fit(X_test, y_test)
eli5.show_weights(perm, feature_names = dummy_df.columns.to_list(),)


**Model Explainability with Shap** [link text](https://shap.readthedocs.io/en/latest/example_notebooks/tabular_examples/linear_models/Sentiment%20Analysis%20with%20Logistic%20Regression.html)

In [None]:
import shap

# explain the model's predictions using SHAP
explainer = shap.explainers.Linear(classifier, X_test, feature_names=dummy_df.columns)
shap_values = explainer(X_test)
print(shap_values.data.shape)

# visualize the model's dependence on the first feature
shap.summary_plot(shap_values)

In [None]:
!pip install skl2onnx

**Model optimization ONNX**

In [None]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(classifier, initial_types=initial_type)

with open("rf_model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())


In [None]:
!pip install onnxruntime

Model Inference from ONNX model

In [None]:
import onnxruntime as ort
import numpy as np
from sklearn.metrics import accuracy_score
# Load ONNX model
session = ort.InferenceSession("rf_model.onnx")

# Prepare input
input_name = session.get_inputs()[0].name
input_data = X_test.astype(np.float32)

# Run inference
preds = session.run(None, {input_name: input_data})[0]
print(accuracy_score(y_test, preds))

In [None]:
import torch
from torch.nn import functional as F
from torch import nn

torch_model = nn.Sequential(
    nn.Linear(4, 10),
    nn.ReLU(),
    nn.Linear(10, 3),
)
torch_model.compile()
print(torch_model)

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create Dataset and DataLoader
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)

In [None]:
from torch import optim
from torch import nn
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(torch_model.parameters(), lr=0.01)

# Training loop
for epoch in range(100):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = torch_model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/100], Loss: {loss.item():.4f}")

In [None]:
torch_model.eval()
with torch.no_grad():
    outputs = torch_model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    acc = accuracy_score(y_test_tensor, predicted)
    print(f"Test Accuracy: {acc * 100:.2f}%")


In [None]:
!pip install torch torchviz graphviz


In [None]:
from torchviz import make_dot
sample_input = X_test_tensor[0].unsqueeze(0)
make_dot(torch_model(sample_input), params=dict(torch_model.named_parameters())).render("iris_model", format="png")


In [None]:
from IPython.display import Image
Image("iris_model.png")

In [None]:
# SHAP Explainability
background = X_train_tensor[:20]
data_to_explain = X_test_tensor[:5]

explainer = shap.GradientExplainer(torch_model, background)
shap_values = explainer.shap_values(data_to_explain)

# Visualization
shap.summary_plot(shap_values, data_to_explain.numpy(), feature_names=dummy_df.columns)

In [None]:
# 5. Quantize the model (dynamic quantization)
quantized_model = torch.quantization.quantize_dynamic(
    torch_model, {nn.Linear}, dtype=torch.qint8
)

# 6. Evaluate the quantized model
with torch.no_grad():
    pred_quant = quantized_model(X_test_tensor).argmax(dim=1)
    acc_quant = accuracy_score(y_test_tensor, pred_quant)
    print(f"[After Quantization] Accuracy: {acc_quant * 100:.2f}%")

In [None]:
import tensorflow as tf

from tensorflow import keras
from keras.layers import Dense

In [None]:
def train_model(_compiled_model, _X_train, _y_train, _X_valid, _y_valid, _model_file_name):
    _history = _compiled_model.fit(_X_train, _y_train,
                                   epochs=25,
                                   validation_data=(_X_valid, _y_valid),
                                   callbacks=[keras.callbacks.EarlyStopping(patience=2)]
                                   )
    _compiled_model.save(_model_file_name)


In [None]:
len(X_train)

In [None]:
X_valid, y_valid = X_train[100:], y_train[100:]

In [None]:
tf_model = keras.models.Sequential()
tf_model.add(keras.layers.Flatten(input_shape=[4]))
tf_model.add(keras.layers.Dense(4, activation="relu"))
tf_model.add(keras.layers.Dense(10, activation="relu"))
tf_model.add(keras.layers.Dense(3, activation="softmax"))

tf_model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.Adam(learning_rate=0.01),
              metrics=["accuracy"])
print(tf_model.summary())
model_file_name = 'iris_data_model.keras'

train_model(tf_model, X_train[:100], y_train[:100], X_train[100:], y_train[100:], model_file_name)

saved_model = keras.models.load_model(model_file_name)
score = saved_model.evaluate(X_test, y_test)
print('Score: ' + str(score))

In [None]:
# SHAP Explainability
background = X_train[:20]
data_to_explain = X_test[:5]

explainer = shap.GradientExplainer(tf_model, background)
shap_values = explainer.shap_values(data_to_explain)

# Visualization
shap.summary_plot(shap_values, data_to_explain, feature_names=dummy_df.columns)

In [None]:
#shap.initjs()
explainer = shap.KernelExplainer(saved_model, background)
shap_values = explainer.shap_values(data_to_explain, nsamples=50)
#shap.plots.force(explainer.expected_value[0], shap_values[..., 0], feature_names=dummy_df.columns)