<a href="https://colab.research.google.com/github/TheUnmeshRaj/Advanced-Generative-AI-For-Skin-Diseases/blob/main/TextClassificationModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
pip install datasets transformers torch scikit-learn



In [25]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity

dataset = load_dataset("Mostafijur/Skin_disease_classify_data")

model_name = 'sentence-transformers/paraphrase-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def embed_text(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings

queries = []
diseases = []
embeddings = []

for example in dataset['train']:
    query = example['Skin_disease_classification']['query']
    disease = example['Skin_disease_classification']['disease']
    queries.append(query)
    diseases.append(disease)

    query_embedding = embed_text(query, tokenizer, model)
    embeddings.append(query_embedding)

def find_similar_disease(input_query, queries, embeddings, tokenizer, model):
    input_embedding = embed_text(input_query, tokenizer, model)
    similarities = [cosine_similarity(input_embedding.detach().numpy(), emb.detach().numpy())[0][0] for emb in embeddings]
    most_similar_idx = similarities.index(max(similarities))
    return diseases[most_similar_idx]



In [None]:
input_query = input("Enter your Symptons here: ")
similar_disease = find_similar_disease(input_query, queries, embeddings, tokenizer, model)
print(f"The most similar disease is: {similar_disease}")

Enter your Symptons here: i got these small, red bumps on my upper arms that feel rough to the touch.
The most similar disease is: Keratosis Pilaris


In [27]:
# model.save_model("skin_disease")

In [24]:
!pip install transformers huggingface_hub



In [28]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [29]:
model.save_pretrained('skin-disease-detection')
tokenizer.save_pretrained('skin-disease-detection')


('skin-disease-detection/tokenizer_config.json',
 'skin-disease-detection/special_tokens_map.json',
 'skin-disease-detection/vocab.txt',
 'skin-disease-detection/added_tokens.json',
 'skin-disease-detection/tokenizer.json')

In [30]:
from huggingface_hub import HfApi

repo_name = "Unmeshraj/skin-disease-detection"

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Unmeshraj/skin-disease-detection/commit/2802dc8f868f4bc0cfc085318efa6b36679285b1', commit_message='Upload tokenizer', commit_description='', oid='2802dc8f868f4bc0cfc085318efa6b36679285b1', pr_url=None, pr_revision=None, pr_num=None)