![](https://upload.wikimedia.org/wikipedia/commons/c/ca/The_Serbian_Alphabet_%28Handwriting%29.svg)

# <b><span style='color:#F1C40F'>0 |</span> ABOUT DATASET</b>
## Context
For recognising handwritten forms, the very first step was to gather data in a considerable amount for training. Which I struggled to collect for weeks.

## Content
The dataset contains 26 folders (A-Z) containing handwritten images in size 2828 pixels, each alphabet in the image is centre fitted to 2020 pixel box.

Each image is stored as Gray-level

Kernel CSVToImages contains script to convert .CSV file to actual images in .png format in structured folder.

Note: Might contain some noisy image as well

## Acknowledgements
The images are taken from NIST(https://www.nist.gov/srd/nist-special-database-19) and NMIST large dataset and few other sources which were then formatted as mentioned above.

## Inspiration
The dataset would serve beginners in machine learning for there created a predictive model to recognise handwritten characters.

# <b><span style='color:#F1C40F'>1 |</span> IMPORT LIBRARIES</b>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, r2_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler



# <b><span style='color:#F1C40F'>2 |</span> LOAD DATASET</b>

In [None]:
data = pd.read_csv('/kaggle/input/dataset/A_Z Handwritten Data.csv')


In [None]:
data.shape

# <b><span style='color:#F1C40F'>3 |</span> SHOW HANDWRITTEN ALPHABETS</b>

In [None]:
import matplotlib.pyplot as plt

# Get the unique classes present in the dataset
unique_classes = data['0'].unique()

# Create a grid for displaying images
n_classes = len(unique_classes)
rows = (n_classes + 5) // 6  # Calculate number of rows needed
fig, axes = plt.subplots(rows, 6, figsize=(12, 2 * rows))  # Adjust figsize for better spacing
fig.suptitle('Sample Images for Each Class', fontsize=16)

for i, class_label in enumerate(unique_classes):
    # Select the first sample for each class
    dd = data[data['0'] == class_label].iloc[0]  # Use iloc[0] to get the first sample
    x = dd[1:].values.reshape((28, 28))  # Reshape into 28x28

    # Plot the image in the corresponding subplot
    ax = axes[i // 6, i % 6]  # Row = i // 6, Column = i % 6
    ax.imshow(x, cmap='binary')
    ax.set_title(f'Class {class_label}')
    ax.axis('off')  # Hide axis for cleaner visuals

# Hide any empty subplots
for j in range(i + 1, rows * 6):
    axes[j // 6, j % 6].axis('off')

# Adjust layout to avoid overlap
plt.tight_layout()
plt.subplots_adjust(top=0.9)  # Adjust top to fit the suptitle
plt.show()



In [None]:
# Count the occurrences of each class
class_counts = data['0'].value_counts().sort_index()

# Create a bar plot for class distribution
plt.figure(figsize=(12, 6))
sns.barplot(x=class_counts.index, y=class_counts.values, palette='viridis')
plt.title('Class Distribution', fontsize=16)
plt.xlabel('Class Label', fontsize=12)
plt.ylabel('Number of Samples', fontsize=12)
plt.xticks(rotation=90)  # Rotate x labels for better visibility
plt.show()


Random Sampling

In [None]:
import pandas as pd

# Assuming your dataframe is named 'data' (first column is labels)
# and the total number of columns is 785 (1 label + 784 pixel values)

# Calculate the number of samples needed per class
samples_per_class = 20000 // data.iloc[:, 0].nunique()  # Assuming balanced classes

# Select samples for each class
balanced_data = (
    data.groupby(data.columns[0])  # Group by the first column (labels)
    .apply(lambda x: x.sample(n=samples_per_class, random_state=42))  # Random sampling
    .reset_index(drop=True)  # Reset index after sampling
)

# Split the balanced data into X (features) and y (labels)
y = balanced_data.iloc[:, 0]   # First column as labels
X = balanced_data.iloc[:, 1:]  # Remaining columns as features

print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")


In [None]:
# Check the class distribution after random sampling
class_counts = y.value_counts()
print("Class Distribution After Random Sampling:")
print(class_counts)

# Visualize the class distribution
plt.figure(figsize=(10, 6))
class_counts.plot(kind='bar')
plt.title("Class Distribution After Random Sampling")
plt.xlabel("Class Labels")
plt.ylabel("Number of Samples")
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Function to plot sample images after random sampling
def plot_sample_images(X, y, num_classes=26, samples_per_class=5):
    fig, axes = plt.subplots(num_classes, samples_per_class, figsize=(10, num_classes * 2))
    fig.suptitle('Sample Images After Random Sampling', fontsize=16)

    for class_label in range(num_classes):
        class_samples = X[y == class_label].sample(samples_per_class, random_state=42)
        for i, ax in enumerate(axes[class_label]):
            image = class_samples.iloc[i].values.reshape(28, 28)  # Reshape to 28x28
            ax.imshow(image, cmap='binary')
            ax.axis('off')
            ax.set_title(f'Class {class_label}')

    plt.tight_layout()
    plt.subplots_adjust(top=0.9)
    plt.show()

# Call the function after performing random sampling
plot_sample_images(X, y)  # X and y should be your balanced random sampled dataset


**STRATIFIED SAMPLING**

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

# Assuming y is your labels Series
num_classes = y.nunique()  # Get the number of unique classes
total_samples = len(y)  # Total number of samples in the dataset

# Set the desired samples per class to 769
samples_per_class = 769

# Calculate the total sample size and ensure it does not exceed available samples
sample_size = min(samples_per_class * num_classes, total_samples)

# Ensure we are not attempting to create a test set that is too large
# Set test_size as a fraction of the total number of samples, at most 1.0 (100%)
test_size_fraction = sample_size / total_samples if total_samples > 0 else 0.1  # Prevent division by zero

# Ensure test_size_fraction is within (0, 1)
if test_size_fraction >= 1.0:
    test_size_fraction = 0.9  # Set it to 90% to keep some for training

# Create StratifiedShuffleSplit with the desired fraction
strat_split = StratifiedShuffleSplit(n_splits=1, test_size=test_size_fraction, random_state=42)

# Perform stratified sampling to get equal representation for each class
for _, sample_index in strat_split.split(X, y):
    stratified_sample_X = X.iloc[sample_index]
    stratified_sample_y = y.iloc[sample_index]

# Resulting stratified samples
X_sample = stratified_sample_X
y_sample = stratified_sample_y


In [None]:
# Print the shapes of the resulting samples
print(f"Shape of X_sample: {X_sample.shape}")
print(f"Shape of y_sample: {y_sample.shape}")


print("Stratified Sample Class Distribution:\n", y_sample.value_counts())


import matplotlib.pyplot as plt

# Number of classes and samples to display per class
num_classes = y_sample.nunique()  # Unique class labels
samples_per_class = 5  # Number of samples to display per class

# Create a figure to plot images
plt.figure(figsize=(num_classes * 2, samples_per_class * 2))  # Adjust figure size

# Loop through each class and plot images
for i, class_label in enumerate(y_sample.unique()):
    # Get samples for the current class
    class_samples = X_sample[y_sample == class_label].head(samples_per_class)

    for j in range(samples_per_class):
        plt.subplot(samples_per_class, num_classes, j * num_classes + i + 1)  # Create subplot
        plt.imshow(class_samples.iloc[j].values.reshape(28, 28), cmap='gray')  # Reshape according to image size
        plt.axis('off')  # Turn off axis
        if j == 0:
            plt.title(f'Class: {class_label}')  # Set title for the first row of each class

plt.tight_layout()
plt.show()

# <b><span style='color:#F1C40F'>4 |</span> SPLITTING DATA</b>

In [None]:
X = data.drop('0',axis=1)
y = data['0']

undersampling 

In [None]:
from imblearn.under_sampling import RandomUnderSampler
import pandas as pd

# Step 1: Under-sampling to balance classes
undersampler = RandomUnderSampler(sampling_strategy={i: 769 for i in y.value_counts().index}, random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X, y)

In [None]:
print("Shape of X_resampled:", X_resampled.shape)
print("Shape of y_resampled:", y_resampled.shape)
print("Class distribution after undersampling:\n", pd.Series(y_resampled).value_counts())


In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_images(X, y, n_classes, images_per_class=10):
    plt.figure(figsize=(n_classes * 2, images_per_class * 2))
    
    # Loop through each class
    for label in range(n_classes):
        class_indices = np.where(y == label)[0]  # Get indices for the current class
        selected_indices = np.random.choice(class_indices, images_per_class, replace=False)  # Randomly select images
        for i, idx in enumerate(selected_indices):
            plt.subplot(images_per_class, n_classes, i * n_classes + label + 1)
            plt.imshow(X.iloc[idx].values.reshape(28, 28), cmap='gray')  # Assuming each image is 28x28 pixels
            plt.axis('off')  # Hide axes
            if i == 0:  # Show class label only for the first image
                plt.title(f'Class {label}', fontsize=14)

    plt.tight_layout()
    plt.show()

# Assuming X_resampled and y_resampled are your resampled data
n_classes = len(np.unique(y_resampled))  # Get number of classes
plot_images(X_resampled, y_resampled, n_classes, images_per_class=10)  # Plot images for each class



**DOWN SAMPLING**

In [None]:
from skimage.transform import resize
import numpy as np
import matplotlib.pyplot as plt

def downsample_images(X, new_size=(16, 16)):
    n_samples = X.shape[0]
    downsampled_images = np.empty((n_samples, new_size[0] * new_size[1]))
    
    # Reshape original images from 28x28 to (n_samples, 28, 28)
    original_images = X.values.reshape(n_samples, 28, 28)  # Assuming original images are 28x28
    
    # Resize all images in one go using a list comprehension
    downsampled_images = np.array([resize(image, new_size, anti_aliasing=True).flatten() for image in original_images])
    
    return downsampled_images

In [None]:
# Downsample to 16x16
X_downsampled_16x16 = downsample_images(X_resampled, new_size=(16, 16))

In [None]:
# Verify the shapes of the downsampled images
print("Shape of downsampled images (16x16):", X_downsampled_16x16.shape)

In [None]:
def plot_downsampled_images(X, y, n_classes, images_per_class=10, title_suffix=""):
    plt.figure(figsize=(n_classes * 2, images_per_class * 2))
    
    for label in range(n_classes):
        class_indices = np.where(y == label)[0]
        selected_indices = np.random.choice(class_indices, images_per_class, replace=False)
        for i, idx in enumerate(selected_indices):
            plt.subplot(images_per_class, n_classes, i * n_classes + label + 1)
            plt.imshow(X[idx].reshape(int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1]))), cmap='gray')  # Reshape based on downsampled size
            plt.axis('off')
            if i == 0:
                plt.title(f'Class {label} {title_suffix}', fontsize=14)

    plt.tight_layout()
    plt.show()

In [None]:
# Plot downsampled images at 16x16
plot_downsampled_images(X_downsampled_16x16, y_resampled, n_classes, images_per_class=5, title_suffix="(8x8)")


In [None]:
# Downsample to 8x8
X_downsampled_8x8 = downsample_images(X_resampled, new_size=(8, 8))

In [None]:
# Verify the shapes of the downsampled images
print("Shape of downsampled images (8x8):", X_downsampled_8x8.shape)

In [None]:
def plot_downsampled_images(X, y, n_classes, images_per_class=10, title_suffix=""):
    plt.figure(figsize=(n_classes * 2, images_per_class * 2))
    
    for label in range(n_classes):
        class_indices = np.where(y == label)[0]
        selected_indices = np.random.choice(class_indices, images_per_class, replace=False)
        for i, idx in enumerate(selected_indices):
            plt.subplot(images_per_class, n_classes, i * n_classes + label + 1)
            plt.imshow(X[idx].reshape(int(np.sqrt(X.shape[1])), int(np.sqrt(X.shape[1]))), cmap='gray')  # Reshape based on downsampled size
            plt.axis('off')
            if i == 0:
                plt.title(f'Class {label} {title_suffix}', fontsize=14)

    plt.tight_layout()
    plt.show()

In [None]:
# Plot downsampled images at 8x8
plot_downsampled_images(X_downsampled_8x8, y_resampled, n_classes, images_per_class=5, title_suffix="(6x6)")


GAUSSIAN FILTER

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.filters import gaussian
from imblearn.under_sampling import RandomUnderSampler

# Load your dataset
# Assuming data is in CSV format with the first column as labels and the rest as pixel values
data = pd.read_csv('../input/az-handwritten-alphabets-in-csv-format/A_Z Handwritten Data.csv')  # Change this path to where your file is located
X = data.iloc[:, 1:]  # Pixel values
y = data.iloc[:, 0]   # Labels

# Step 1: Perform undersampling
undersampler = RandomUnderSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X, y)

# Step 2: Apply Gaussian filter
def apply_gaussian_filter(X, sigma=1.0, image_shape=(28, 28)):
    blurred_images = np.empty((X.shape[0], image_shape[0] * image_shape[1]))
    for i in range(X.shape[0]):
        image = X.iloc[i].values.reshape(image_shape)
        blurred_image = gaussian(image, sigma=sigma)
        blurred_images[i] = blurred_image.flatten()
    return pd.DataFrame(blurred_images)

# Apply Gaussian filter to the resampled dataset
X_blurred = apply_gaussian_filter(X_resampled, sigma=1.0, image_shape=(28, 28))

# Step 3: Plot images for each class label after Gaussian filtering
def plot_images_per_class(X, y, n_classes, images_per_class=5, title_suffix="(Gaussian Filter Applied)"):
    plt.figure(figsize=(15, images_per_class * 3))  # Increase figure size
    for label in range(n_classes):
        indices = np.where(y == label)[0][:images_per_class]  # Select images_per_class instances per class
        for i, idx in enumerate(indices):
            plt.subplot(images_per_class, n_classes, i * n_classes + label + 1)
            plt.imshow(X.iloc[idx].values.reshape(28, 28), cmap='gray')
            plt.axis('off')
            if i == 0:
                plt.title(f"{label}", fontsize=14)  # Only show class number
    plt.subplots_adjust(hspace=0.5, wspace=0.3)  # Adjust spacing between subplots
    plt.suptitle(f"Sample Images per Class {title_suffix}", fontsize=18)  # Increase overall title font size
    plt.show()

# Define number of classes and plot images
n_classes = len(np.unique(y_resampled))  # Number of unique classes in y_resampled
plot_images_per_class(X_blurred, y_resampled, n_classes, images_per_class=5)


PCA

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


In [None]:
# Step 1: Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_blurred)


In [None]:
# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)


In [None]:

from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  # or another classifier of your choice
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_blurred)

# Assuming X_scaled is your scaled feature matrix and y_resampled is your target variable
# Step 1: Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)

# Step 2: Perform PCA
# Set the number of components based on desired explained variance or fixed number of components
pca = PCA(n_components=0.95)  # Retains 95% variance; adjust as needed
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Step 3: Initialize and Train Classifier
classifier = RandomForestClassifier(random_state=42)  # Replace with your classifier if needed
classifier.fit(X_train_pca, y_train)

# Step 4: Make Predictions
y_pred = classifier.predict(X_test_pca)

# Step 5: Evaluate the Model
# Overall accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Overall Accuracy:", accuracy)

# F1 Score (weighted and macro)
f1_weighted = f1_score(y_test, y_pred, average='weighted')
f1_macro = f1_score(y_test, y_pred, average='macro')
print("Weighted F1 Score:", f1_weighted)
print("Macro F1 Score:", f1_macro)

# Classification Report (detailed with macro and weighted averages)
report = classification_report(y_test, y_pred, zero_division=0, output_dict=True)

# Extract and display weighted and macro avg from the classification report
weighted_avg = report['weighted avg']
macro_avg = report['macro avg']

print("\nDetailed Classification Report:")
print("Weighted Avg - F1 Score:", weighted_avg['f1-score'])
print("Weighted Avg - Precision:", weighted_avg['precision'])
print("Weighted Avg - Recall:", weighted_avg['recall'])
print("Macro Avg - F1 Score:", macro_avg['f1-score'])
print("Macro Avg - Precision:", macro_avg['precision'])
print("Macro Avg - Recall:", macro_avg['recall'])

# Display the full classification report in a readable format
print("\nFull Classification Report:\n", classification_report(y_test, y_pred, zero_division=0))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", conf_matrix)


BAGGING CLASSIFIER 

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# Initialize the base estimator (e.g., Decision Tree) for Bagging
dt_classifier = DecisionTreeClassifier(random_state=42)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)

# Initialize and train the Bagging Classifier with the base estimator
bagging_classifier = BaggingClassifier(estimator=dt_classifier, n_estimators=10, max_samples=0.8, max_features=0.8, random_state=42)

# Fit the Bagging Classifier using the training data
bagging_classifier.fit(X_train, y_train)

# Make predictions and evaluate the Bagging Classifier
y_pred_bagging = bagging_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_bagging)
report = classification_report(y_test, y_pred_bagging)
conf_matrix = confusion_matrix(y_test, y_pred_bagging)

# Print metrics
print("Bagging Classifier Accuracy:", accuracy)
print("Bagging Classifier Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


VOTING CLASSIFIER 

In [None]:
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Define individual classifiers for Voting
dt_classifier = DecisionTreeClassifier(random_state=42)
rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42)
nb_classifier = GaussianNB()

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)

# Initialize and train the Voting Classifier with the selected classifiers
voting_classifier = VotingClassifier(
    estimators=[('dt', dt_classifier), ('rf', rf_classifier), ('nb', nb_classifier)],
    voting='hard'  # Use 'soft' if classifiers support probability outputs
)

# Fit the Voting Classifier using the training data
voting_classifier.fit(X_train, y_train)

# Make predictions and evaluate the Voting Classifier
y_pred_voting = voting_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_voting)
report = classification_report(y_test, y_pred_voting)
conf_matrix = confusion_matrix(y_test, y_pred_voting)

# Print metrics
print("Voting Classifier Accuracy:", accuracy)
print("Voting Classifier Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


NAIVE BAYES CLASSIFIER 

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# Step 1: Split the data into training and testing sets (ensure you have X_scaled and y_resampled defined)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)

# Initialize and train the Naive Bayes Classifier
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)  # Use the training data

# Make predictions and evaluate the Naive Bayes Classifier
y_pred_nb = nb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_nb)
report = classification_report(y_test, y_pred_nb)
conf_matrix = confusion_matrix(y_test, y_pred_nb)

# Print metrics
print("Naive Bayes Classifier Accuracy:", accuracy)
print("Naive Bayes Classifier Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)


SVM

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming you have your data scaled and prepared as X_scaled and y_resampled
# Step 1: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)

# Initialize and train the Support Vector Machine Classifier
svm_classifier = SVC(random_state=42)  # You can adjust parameters as needed
svm_classifier.fit(X_train, y_train)  # Use the training data

# Make predictions and evaluate the SVM Classifier
y_pred_svm = svm_classifier.predict(X_test)

# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred_svm)
confusion = confusion_matrix(y_test, y_pred_svm)
precision = precision_score(y_test, y_pred_svm, average='weighted')
recall = recall_score(y_test, y_pred_svm, average='weighted')
f1 = f1_score(y_test, y_pred_svm, average='weighted')

# Print results
print("SVM Classifier Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("SVM Classifier Report:\n", classification_report(y_test, y_pred_svm))


KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Assuming you have your data scaled and prepared as X_scaled and y_resampled
# Step 1: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_resampled, test_size=0.2, random_state=42)

# Initialize and train the k-Nearest Neighbors Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors
knn_classifier.fit(X_train, y_train)  # Use the training data

# Make predictions and evaluate the KNN Classifier
y_pred_knn = knn_classifier.predict(X_test)

# Evaluate metrics
accuracy = accuracy_score(y_test, y_pred_knn)
confusion = confusion_matrix(y_test, y_pred_knn)
precision = precision_score(y_test, y_pred_knn, average='weighted')
recall = recall_score(y_test, y_pred_knn, average='weighted')
f1 = f1_score(y_test, y_pred_knn, average='weighted')

# Print results
print("KNN Classifier Accuracy:", accuracy)
print("Confusion Matrix:\n", confusion)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("KNN Classifier Report:\n", classification_report(y_test, y_pred_knn))


RANDOM FOREST CLASSIFIER 

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Step 1: Initialize the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Step 2: Train the model on the training data
rf_model.fit(X_train, y_train)

# Step 3: Make predictions on the test data
y_pred_rf = rf_model.predict(X_test)

# Step 4: Evaluate the model
accuracy = accuracy_score(y_test, y_pred_rf)
precision = precision_score(y_test, y_pred_rf, average='weighted')  # average='weighted' considers class imbalance
recall = recall_score(y_test, y_pred_rf, average='weighted')
f1 = f1_score(y_test, y_pred_rf, average='weighted')

print("Random Forest Classifier Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))
print("\nOverall Precision:", precision)
print("Overall Recall:", recall)
print("Overall F1-Score:", f1)


DTC CLASSIFIER 

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report

# Step 7: Initialize and train the Decision Tree Classifier
dtc_model = DecisionTreeClassifier(random_state=42)
dtc_model.fit(X_train, y_train)

# Step 8: Make predictions and evaluate the model
y_pred_dtc = dtc_model.predict(X_test)

# Calculate and display accuracy
accuracy_dtc = accuracy_score(y_test, y_pred_dtc)
print("Decision Tree Classifier Accuracy:", accuracy_dtc)

# Display classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred_dtc))

# Calculate overall precision, recall, and F1-score
precision_dtc = precision_score(y_test, y_pred_dtc, average='weighted')
recall_dtc = recall_score(y_test, y_pred_dtc, average='weighted')
f1_dtc = f1_score(y_test, y_pred_dtc, average='weighted')

print("\nOverall Precision:", precision_dtc)
print("Overall Recall:", recall_dtc)
print("Overall F1-Score:", f1_dtc)


In [None]:
# print(classification_report(y_test,ypred))

# <b><span style='color:#F1C40F'>7 |</span> SHOW PREDICTED LABEL</b>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('/kaggle/input/dataset/A_Z Handwritten Data.csv')

# Assuming the dataset contains a label column named 'label'
labels = data['label']
X = data.drop(columns=['label'])

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM model
svm_model = svm.SVC(kernel='linear')
svm_model.fit(X_train_scaled, y_train)

# Function to predict and display a handwritten character
def perdhandwritten(ind):
    x = X_test.iloc[ind].values.reshape(1, -1)
    row_scaled = scaler.transform(x)
    y_pred = svm_model.predict(row_scaled)
    
    plot_data = X_test.iloc[ind].values.reshape(28, 28)
    
    plt.figure(dpi=150)
    plt.title(f'Model Prediction: {chr(y_pred[0] + 65)}')  # Convert label to alphabet
    plt.imshow(plot_data, cmap='gray')
    plt.axis('off')
    plt.show()

# Example usage
perdhandwritten(10)  # Example index


In [None]:
perdhandwritten(3323)
perdhandwritten(15323)
perdhandwritten(25323)
perdhandwritten(38323)
perdhandwritten(48323)
perdhandwritten(58323)
perdhandwritten(68323)
perdhandwritten(78323)
perdhandwritten(88323)
perdhandwritten(98323)
perdhandwritten(108323)
perdhandwritten(118323)

# The End...