In [1]:
import os
import zipfile
import urllib.request
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score
import gradio as gr

def download_dataset():
    dataset_url = "https://github.com/Horea94/Fruit-Images-Dataset/archive/master.zip"
    dataset_zip_path = "fruits_dataset.zip"
    extracted_path = "Fruit-Images-Dataset-master"

    if not os.path.exists(extracted_path):
        print("Downloading Fruits-360 dataset...")
        urllib.request.urlretrieve(dataset_url, dataset_zip_path)
        with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
            zip_ref.extractall(".")
        print("✅ Dataset extracted successfully.")
    else:
        print("✅ Dataset already present.")
    return extracted_path

def load_images(folder, image_size=(64, 64), max_per_class=100):  # safer limit
    images, labels = [], []
    training_path = os.path.join(folder, "Training")
    for class_name in os.listdir(training_path):
        class_path = os.path.join(training_path, class_name)
        if not os.path.isdir(class_path):
            continue
        for i, img_name in enumerate(os.listdir(class_path)):
            if i >= max_per_class:
                break
            try:
                img = Image.open(os.path.join(class_path, img_name)).convert('RGB').resize(image_size)
                images.append(np.array(img, dtype=np.float32) / 255.0)
                labels.append(class_name.lower())
            except:
                continue
    return np.array(images, dtype=np.float32), np.array(labels)

def train_linear_model(X_train, X_test, y_train, y_test):
    X_train_flat = X_train.reshape(X_train.shape[0], -1).astype('float32')
    X_test_flat = X_test.reshape(X_test.shape[0], -1).astype('float32')

    pca = PCA(n_components=50)
    X_train_pca = pca.fit_transform(X_train_flat)
    X_test_pca = pca.transform(X_test_flat)

    clf = RidgeClassifier()
    clf.fit(X_train_pca, y_train)

    y_pred = clf.predict(X_test_pca)
    print("✅ RidgeClassifier Accuracy:", accuracy_score(y_test, y_pred))
    return clf, pca

def predict(image, le, clf, pca):
    img = image.resize((64, 64)).convert('RGB')
    img_array = np.array(img, dtype=np.float32).reshape(1, -1) / 255.0
    img_pca = pca.transform(img_array)
    prediction = clf.predict(img_pca)
    return f"Predicted Fruit (Linear Model): {le.inverse_transform(prediction)[0]}"

def main():
    dataset_folder = download_dataset()
    X, y = load_images(dataset_folder)

    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    clf, pca = train_linear_model(X_train, X_test, y_train, y_test)

    gr.Interface(
        fn=lambda img: predict(img, le, clf, pca),
        inputs=gr.Image(type="pil", label="Upload Fruit Image"),
        outputs="text",
        title="🍋 Fruit Classification with RidgeClassifier",
        description="Upload a fruit image to classify it using Ridge (Linear Regression for Classification)"
    ).launch()

if __name__ == "__main__":
    main()


✅ Dataset already present.
✅ RidgeClassifier Accuracy: 0.866793893129771
* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.
