<a href="https://colab.research.google.com/github/SandySingh72/DATA_Analytics/blob/main/Predictive_Modelling_on_Glass_DATA_For_Forensic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [17]:
import sklearn
sklearn.__version__

'1.6.1'

In [4]:
glass = pd.read_csv('Glass.csv')
glass.shape

(214, 10)

In [5]:
train, test = train_test_split(glass, test_size=0.3, random_state=25, stratify=glass['Type'])

In [6]:
x_train = train.drop('Type', axis=1)
y_train = train['Type']
x_test = test.drop('Type', axis=1)
y_test = test['Type']

# **Linear Regression**

In [9]:
lr = LogisticRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
accuracy_score(y_test, y_pred)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.6615384615384615

# **Decision Tree Regression**

In [11]:
dtc = DecisionTreeClassifier(random_state=25)
dtc.fit(x_train, y_train)
y_pred = dtc.predict(x_test)
accuracy_score(y_test, y_pred)

0.6615384615384615

# **Random Forest Regressor**

In [12]:
rf = RandomForestClassifier(random_state=25)
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)
accuracy_score(y_test, y_pred)

0.7538461538461538

# **Gradient Boosting Regressor**

In [13]:
gbm = GradientBoostingClassifier(random_state=25)
gbm.fit(x_train, y_train)
y_pred = gbm.predict(x_test)
accuracy_score(y_test, y_pred)

0.8

# **Hyper-Parameter Tuning**

In [15]:
rates = [0.1, 0.3, 0.5, 0.8]
depths = [2, 3, 4, 5]
scores = []
for r in rates:
    for d in depths:
        gbm = GradientBoostingClassifier(random_state=25, learning_rate=r, max_depth=d)
        gbm.fit(x_train, y_train)
        y_pred = gbm.predict(x_test)
        scores.append([r, d, accuracy_score(y_test, y_pred)])

In [16]:
df_scores = pd.DataFrame(scores, columns = ['rate','depth','acc'])
df_scores.sort_values('acc', ascending=False)

Unnamed: 0,rate,depth,acc
15,0.8,5,0.861538
0,0.1,2,0.815385
6,0.3,4,0.815385
7,0.3,5,0.815385
11,0.5,5,0.815385
14,0.8,4,0.815385
9,0.5,3,0.815385
1,0.1,3,0.8
2,0.1,4,0.8
8,0.5,2,0.784615


# **Inferencing**

# **Building the Best Model on whole Data**

In [21]:
x = glass.drop('Type', axis=1)
y = glass['Type']

In [25]:
best_model = GradientBoostingClassifier(random_state=25, learning_rate=0.8, max_depth=5)
best_model.fit(x, y)

# **Unlabelled Data**

In [27]:
tst_glass = pd.read_csv('tst_Glass.csv')
tst_glass['identified as'] = best_model.predict(tst_glass)
tst_glass['identified as'].value_counts()

Unnamed: 0_level_0,count
identified as,Unnamed: 1_level_1
containers,3
headlamps,2
building_windows_non_float_processed,1


In [28]:
tst_glass

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,identified as
0,1.5321,14.0,0.0,0.34,70.23,0.001,6.7,1.23,0.0,headlamps
1,1.5212,15.0,3.0,1.23,75.9,0.1,7.0,0.0,0.44,building_windows_non_float_processed
2,1.5112,13.0,3.5,2.3,73.0,3.4,14.0,2.3,0.22,containers
3,1.5,12.4,1.23,3.22,74.22,4.5,10.0,3.1,0.1,containers
4,1.52,13.0,2.4,0.34,71.22,3.2,9.0,1.44,0.001,headlamps
5,1.51,16.0,2.7,4.0,70.0,2.0,6.0,2.9,0.89,containers


In [30]:
!pip install gradio



In [29]:
import gradio as gr

In [31]:
def predict(RI, Na, Mg, Al, Si, K, Ca, Ba, Fe):
    tst = pd.DataFrame([[RI, Na, Mg, Al, Si, K, Ca, Ba, Fe]],
          columns=['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe'])
    return best_model.predict(tst)[0]

In [32]:
with gr.Blocks() as demo:
    with gr.Row():
      RI = gr.Number(label='RI')
      Na = gr.Number(label='Na')
      Mg = gr.Number(label='Mg')
    with gr.Row():
      Al = gr.Number(label='Al')
      Si = gr.Number(label='Si')
      K = gr.Number(label='K')
    with gr.Row():
      Ca = gr.Number(label='Ca')
      Ba = gr.Number(label='Ba')
      Fe = gr.Number(label='Fe')
    with gr.Row():
      Type = gr.Text(label='Type')
    with gr.Row():
      button = gr.Button(value="Which Glass?")
      button.click(predict,
            inputs=[RI, Na, Mg, Al, Si, K, Ca, Ba, Fe],
            outputs=[Type])

In [33]:
demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://149f1b1a18c5765b4c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


