In [1]:
!pip install -q kaggle

# If running on Colab, upload your kaggle.json or move it to ~/.kaggle/
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [3]:
# Download the dataset (auto creates folder and downloads zip)
!kaggle datasets download -d dhananjayka/isl-dataset-spit --unzip


Dataset URL: https://www.kaggle.com/datasets/dhananjayka/isl-dataset-spit
License(s): unknown


In [10]:
import os
import pandas as pd

# Path to your dataset root
dataset_path = './ISL_Dataset'

# Supported image file extensions
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')

# List to hold our data
data = []

# Walk through each folder (label) and its files
for dirpath, dirnames, filenames in os.walk(dataset_path):
    for file in filenames:
        if file.lower().endswith(image_extensions):
            label = os.path.basename(dirpath)   # folder name = class label
            full_path = os.path.join(dirpath, file)
            data.append((file, full_path, label))

# Create a DataFrame
df = pd.DataFrame(data, columns=['filename', 'filepath', 'label'])

# Show first few rows
print(df.head())

# Optional: check total number of images and classes
print(f"\nTotal images: {len(df)}")
print(f"Classes found: {df['label'].unique()}")


                           filename  \
0  captured_roi_20240423_115056.jpg   
1  captured_roi_20240423_115113.jpg   
2  captured_roi_20240423_114359.jpg   
3  captured_roi_20240423_115355.jpg   
4  captured_roi_20240423_115511.jpg   

                                           filepath label  
0  ./ISL_Dataset/L/captured_roi_20240423_115056.jpg     L  
1  ./ISL_Dataset/L/captured_roi_20240423_115113.jpg     L  
2  ./ISL_Dataset/L/captured_roi_20240423_114359.jpg     L  
3  ./ISL_Dataset/L/captured_roi_20240423_115355.jpg     L  
4  ./ISL_Dataset/L/captured_roi_20240423_115511.jpg     L  

Total images: 13796
Classes found: ['L' 'W' 'G' 'B' 'N' 'M' 'F' 'Z' 'V' 'S' 'T' 'E' 'Y' 'X' 'P' 'U' 'K' 'A'
 'H' 'J' 'I' 'C' 'R' 'O' 'D' 'Q']


In [11]:
df

Unnamed: 0,filename,filepath,label
0,captured_roi_20240423_115056.jpg,./ISL_Dataset/L/captured_roi_20240423_115056.jpg,L
1,captured_roi_20240423_115113.jpg,./ISL_Dataset/L/captured_roi_20240423_115113.jpg,L
2,captured_roi_20240423_114359.jpg,./ISL_Dataset/L/captured_roi_20240423_114359.jpg,L
3,captured_roi_20240423_115355.jpg,./ISL_Dataset/L/captured_roi_20240423_115355.jpg,L
4,captured_roi_20240423_115511.jpg,./ISL_Dataset/L/captured_roi_20240423_115511.jpg,L
...,...,...,...
13791,captured_roi_20240423_181707.jpg,./ISL_Dataset/Q/captured_roi_20240423_181707.jpg,Q
13792,captured_roi_20240423_182359.jpg,./ISL_Dataset/Q/captured_roi_20240423_182359.jpg,Q
13793,captured_roi_20240423_182645.jpg,./ISL_Dataset/Q/captured_roi_20240423_182645.jpg,Q
13794,captured_roi_20240423_182113.jpg,./ISL_Dataset/Q/captured_roi_20240423_182113.jpg,Q


In [12]:
pip install pillow scikit-learn pandas




In [13]:
import os
import pandas as pd
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Path to dataset
dataset_path = './ISL_Dataset'
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')

# Image size to resize (fixed size for all)
IMAGE_SIZE = (64, 64)

# Lists to store data and labels
image_data = []
labels = []

# Walk through each folder (label) and its image files
for dirpath, dirnames, filenames in os.walk(dataset_path):
    for file in filenames:
        if file.lower().endswith(image_extensions):
            label = os.path.basename(dirpath)
            full_path = os.path.join(dirpath, file)

            # Open image and resize
            img = Image.open(full_path).convert('L')  # Convert to grayscale (or 'RGB' for color)
            img = img.resize(IMAGE_SIZE)

            # Convert image to numpy array and flatten it
            img_array = np.array(img).flatten()

            image_data.append(img_array)
            labels.append(label)

# Convert lists to numpy arrays
X = np.array(image_data)
y = np.array(labels)

print("Image data shape:", X.shape)
print("Labels shape:", y.shape)

# Encode labels to numeric values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Example model: Logistic Regression (or you can plug in any classifier)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"\nModel Accuracy: {accuracy:.4f}")

# If you want to see which label maps to which number:
print("\nLabel encoding map:")
for label, number in zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)):
    print(f"{label}: {number}")


Image data shape: (13796, 4096)
Labels shape: (13796,)

Model Accuracy: 0.9667

Label encoding map:
A: 0
B: 1
C: 2
D: 3
E: 4
F: 5
G: 6
H: 7
I: 8
J: 9
K: 10
L: 11
M: 12
N: 13
O: 14
P: 15
Q: 16
R: 17
S: 18
T: 19
U: 20
V: 21
W: 22
X: 23
Y: 24
Z: 25


In [14]:
y_pred_train=model.predict(X_train)
accuracy_score(y_train,y_pred_train)

1.0

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred = rf_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nRandom Forest Model Accuracy: {accuracy:.4f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# If you want to see which label maps to which number:
print("\nLabel encoding map:")
for label, number in zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)):
    print(f"{label}: {number}")


Random Forest Model Accuracy: 0.9815

Classification Report:
              precision    recall  f1-score   support

           A       0.99      1.00      1.00       103
           B       1.00      1.00      1.00       105
           C       1.00      1.00      1.00       105
           D       1.00      1.00      1.00       109
           E       1.00      0.99      1.00       105
           F       0.99      0.99      0.99       100
           G       0.96      0.96      0.96       104
           H       0.96      0.96      0.96       106
           I       0.96      0.98      0.97       104
           J       0.98      0.93      0.95       108
           K       0.97      1.00      0.99       105
           L       0.97      0.95      0.96       117
           M       1.00      0.94      0.97       117
           N       0.98      0.98      0.98       109
           O       0.98      0.98      0.98       110
           P       0.99      0.98      0.99       101
           Q       

In [18]:
y_pred = rf_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.9815217391304348

In [19]:
y_pred_train=model.predict(X_train)
accuracy_score(y_train,y_pred_train)

1.0

In [20]:
import pickle

# Save model to a file
with open('rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

# Also save your label encoder so you can decode predictions later
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)


In [22]:
!pip install streamlit
import streamlit as st
import numpy as np
from PIL import Image
import pickle

# Load trained model and label encoder
with open('rf_model.pkl', 'rb') as f:
    model = pickle.load(f)

with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Image size to match training
IMAGE_SIZE = (64, 64)

# Title
st.title("ISL Dataset - Image Classifier (Random Forest)")

# File uploader
uploaded_file = st.file_uploader("Choose an image...", type=['jpg', 'jpeg', 'png'])

if uploaded_file is not None:
    # Display uploaded image
    image = Image.open(uploaded_file).convert('L')  # Convert to grayscale
    st.image(image, caption='Uploaded Image', use_column_width=True)

    # Resize and convert image to array
    image_resized = image.resize(IMAGE_SIZE)
    image_array = np.array(image_resized).flatten().reshape(1, -1)

    # Predict label
    prediction = model.predict(image_array)
    predicted_label = label_encoder.inverse_transform(prediction)[0]

    # Display result
    st.success(f"Predicted Class: {predicted_label}")


Collecting streamlit
  Downloading streamlit-1.44.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.44.1-py3-none-any.whl (9.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hInst

2025-04-13 08:44:57.120 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


In [24]:
import pickle

SyntaxError: invalid syntax (<ipython-input-24-5124338ac38b>, line 1)

In [25]:
print(len(rf_model.estimators_))


100


In [26]:
print("max_features:", rf_model.max_features)



max_features: sqrt


In [27]:
print("bootstrap:", rf_model.bootstrap)


bootstrap: True
