In [6]:
import pandas as pd

# Creating a dataset of skin diseases and their symptoms
skin_diseases = {
    "Eczema": ["itchy", "red", "dry", "thickened"],
    "Psoriasis": ["red", "thick", "scaly", "itching"],
    "Acne": ["pimples", "blackheads", "scars", "swelling"],
    "Rosacea": ["redness", "bumps", "burning"],
    "Contact Dermatitis": ["red", "itchy", "blisters"],
    "Fungal Infections": ["itchy", "scaly", "flaky"],
    "Skin Cancer": ["growth", "moles", "coloration"],
    "Shingles": ["painful", "blisters", "burning"],
    "Vitiligo": ["patches", "loss"],
    "Seborrheic Dermatitis": ["greasy", "scales", "dandruff"],
    "Impetigo": ["blisters", "honey", "crusty"],
    "Lichen Planus": ["itchy", "purple", "flat"],
    "Hives": ["raised", "itchy", "red"],
    "Scabies": ["itchy", "burrows", "rash"],
    "Basal Cell Carcinoma": ["nodules", "shiny", "ulcerated"],
    "Squamous Cell Carcinoma": ["red", "scaly", "warts"],
    "Keratosis": ["thickened", "scaly", "rough"],
    "Milia": ["small", "white", "cysts"],
}

# Create a set of all unique symptoms
all_symptoms = set(symptom for symptoms in skin_diseases.values() for symptom in symptoms)

# Convert the set of symptoms to a sorted list
all_symptoms_list = sorted(list(all_symptoms))

# Create a DataFrame with binary encoding
df = pd.DataFrame(0, index=skin_diseases.keys(), columns=all_symptoms_list)

# Populate the DataFrame with 1s where symptoms are present
for disease, symptoms in skin_diseases.items():
    for symptom in symptoms:
        df.at[disease, symptom] = 1

# Reset index to make it easier to save as CSV
df.reset_index(inplace=True)
df.rename(columns={'index': 'Disease'}, inplace=True)


In [7]:
df

Unnamed: 0,Disease,blackheads,blisters,bumps,burning,burrows,coloration,crusty,cysts,dandruff,...,scaly,scars,shiny,small,swelling,thick,thickened,ulcerated,warts,white
0,Eczema,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,Psoriasis,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
2,Acne,1,0,0,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
3,Rosacea,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Contact Dermatitis,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Fungal Infections,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
6,Skin Cancer,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Shingles,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Vitiligo,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Seborrheic Dermatitis,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Saving the dataset to a CSV file
df.to_csv("skin_diseases_symptoms.csv", index=False)

print("Dataset created and saved as 'skin_diseases_symptoms.csv'.")