In [1]:
import pandas as pd

# Créer un dataset simulé
data = {
    "symptom1": ["fever", "cough", "headache", "fatigue"],
    "symptom2": ["headache", "chest pain", "blurred vision", "fever"],
    "symptom3": ["nausea", "fever", "nausea", "body ache"],
    "disease": ["Malaria", "Pneumonia", "Migraine", "Dengue"]
}

df = pd.DataFrame(data)
df.to_csv("symptoms_diseases.csv", index=False)
print("Fichier CSV créé : symptoms_diseases.csv")


Fichier CSV créé : symptoms_diseases.csv


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

# Charger le fichier CSV
df = pd.read_csv("symptoms_diseases.csv")

# Combiner les colonnes de symptômes en une seule
df["all_symptoms"] = df[["symptom1", "symptom2", "symptom3"]].apply(lambda x: " ".join(x), axis=1)

# Diviser les données en entrées (X) et étiquettes (y)
X = df["all_symptoms"]
y = df["disease"]

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline avec vectorisation et classification
model = Pipeline([
    ("vectorizer", CountVectorizer()),  # Transforme le texte en vecteurs numériques
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))  # Modèle de classification
])

# Entraîner le modèle
model.fit(X_train, y_train)

# Évaluer le modèle
y_pred = model.predict(X_test)
print("Précision :", accuracy_score(y_test, y_pred))

# Sauvegarder le modèle
import joblib
joblib.dump(model, "disease_classifier.pkl")
print("Modèle sauvegardé : disease_classifier.pkl")



Précision : 0.6666666666666666
Modèle sauvegardé : disease_classifier.pkl


In [4]:
pip install scikit-learn





You should consider upgrading via the 'C:\Users\rosie\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


In [6]:
print(df.head())  # Vérifiez les premières lignes du dataset
print(df["disease"].value_counts())  # Vérifiez le nombre d'occurrences pour chaque maladie
print(df.isnull().sum())  # Vérifiez les valeurs manquantes



   symptom1        symptom2   symptom3    disease   
0     fever        headache     nausea    Malaria  \
1     cough      chest pain      fever  Pneumonia   
2  headache  blurred vision     nausea   Migraine   
3   fatigue           fever  body ache     Dengue   

                     all_symptoms  
0           fever headache nausea  
1          cough chest pain fever  
2  headache blurred vision nausea  
3         fatigue fever body ache  
disease
Malaria      1
Pneumonia    1
Migraine     1
Dengue       1
Name: count, dtype: int64
symptom1        0
symptom2        0
symptom3        0
disease         0
all_symptoms    0
dtype: int64


In [8]:
vectorizer = CountVectorizer()
X_vect = vectorizer.fit_transform(X)
print(vectorizer.get_feature_names_out())  # Liste des mots utilisés par le modèle
print(X_vect.toarray())  # Représentation numérique des symptômes


['ache' 'blurred' 'body' 'chest' 'cough' 'fatigue' 'fever' 'headache'
 'nausea' 'pain' 'vision']
[[0 0 0 0 0 0 1 1 1 0 0]
 [0 0 0 1 1 0 1 0 0 1 0]
 [0 1 0 0 0 0 0 1 1 0 1]
 [1 0 1 0 0 1 1 0 0 0 0]]


In [14]:
from faker import Faker
import random

fake = Faker()

diseases = ["Malaria", "Pneumonia", "Migraine", "Dengue", "Flu"]
symptoms = ["fever", "cough", "headache", "nausea", "fatigue", "body ache", "chest pain", "blurred vision"]

# Générer des exemples synthétiques
synthetic_data = []
for _ in range(100):
    synthetic_data.append({
        "symptom1": random.choice(symptoms),
        "symptom2": random.choice(symptoms),
        "symptom3": random.choice(symptoms),
        "disease": random.choice(diseases)
    })

synthetic_df = pd.DataFrame(synthetic_data)
synthetic_df.to_csv("synthetic_symptoms_diseases.csv", index=False)


In [13]:
pip install faker


Collecting faker
  Downloading Faker-33.1.0-py3-none-any.whl (1.9 MB)
Installing collected packages: faker
Successfully installed faker-33.1.0
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'C:\Users\rosie\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip' command.


In [12]:
python -m pip install --upgrade pip


SyntaxError: invalid syntax (3439513114.py, line 1)

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Exemple de dataset
data = {
    'Symptômes': ['Fièvre, toux, fatigue', 'Mal de tête, nausée, vomissement', 'Douleur thoracique, essoufflement'],
    'Maladie': ['Grippe', 'Migraine', 'Crise cardiaque']
}
df = pd.DataFrame(data)

# Vectorisation des symptômes
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Symptômes'])
y = df['Maladie']

# Diviser en ensemble d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Construction du modèle séquentiel
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(len(df['Maladie'].unique()), activation='softmax')  # Pour la classification multi-classe
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Entraînement
model.fit(X_train.toarray(), y_train.factorize()[0], epochs=10, batch_size=8, validation_data=(X_test.toarray(), y_test.factorize()[0]))
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Modèle
model = MultinomialNB()
model.fit(X_train, y_train)

# Prédiction
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


ModuleNotFoundError: No module named 'tensorflow'

In [4]:
pip install scipy scikit-learn

Collecting numpy<2.3,>=1.22.4
  Using cached numpy-2.0.2-cp39-cp39-win_amd64.whl (15.9 MB)
Installing collected packages: numpy
Successfully installed numpy-2.0.2
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
daal4py 2021.6.0 requires daal==2021.4.0, which is not installed.
numba 0.55.1 requires numpy<1.22,>=1.18, but you have numpy 2.0.2 which is incompatible.
langchain 0.0.163 requires numpy<2,>=1, but you have numpy 2.0.2 which is incompatible.


In [3]:
pip install tensorflow


Collecting tensorflow
  Using cached tensorflow-2.13.1-cp38-cp38-win_amd64.whl (1.9 kB)
  Using cached tensorflow-2.13.0-cp38-cp38-win_amd64.whl (1.9 kB)
Collecting tensorflow-intel==2.13.0
  Downloading tensorflow_intel-2.13.0-cp38-cp38-win_amd64.whl (276.5 MB)
Note: you may need to restart the kernel to use updated packages.


ERROR: Exception:
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\site-packages\pip\_vendor\urllib3\response.py", line 438, in _error_catcher
    yield
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\site-packages\pip\_vendor\urllib3\response.py", line 519, in read
    data = self._fp.read(amt) if not fp_closed else b""
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\site-packages\pip\_vendor\cachecontrol\filewrapper.py", line 62, in read
    data = self.__fp.read(amt)
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 455, in read
    n = self.readinto(b)
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 499, in readinto
 

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Charger les données
data = pd.read_csv("dataset.csv")  # Remplacez par votre chemin
X = data["Symptomes"]
y = data["Maladie"]

# Vectorisation des textes
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Diviser en train/test
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Modèle de classification (Logistic Regression)
model = LogisticRegression()
model.fit(X_train, y_train)

# Prédictions
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


KeyError: 'Symptomes'

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Charger les données
data = pd.read_csv("dataset.csv")  # Remplacez par votre chemin
X = data["Symptomes"]
y = data["Maladie"]

# Vectorisation des textes
vectorizer = TfidfVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Diviser en train/test
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Modèle de classification (Logistic Regression)
model = LogisticRegression()
model.fit(X_train, y_train)

# Prédictions
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
from transformers import pipeline

# Charger un modèle BERT pour les questions-réponses
qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

# Exemple de prédiction
context = "La sinusite est une inflammation des sinus qui peut causer de la fièvre et des maux de tête."
question = "Quels sont les symptômes de la sinusite ?"

result = qa_pipeline(question=question, context=context)
print(result["answer"])


In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    user_input = request.json.get('symptoms')
    symptoms_vectorized = vectorizer.transform([user_input])
    prediction = model.predict(symptoms_vectorized)
    return jsonify({"disease": prediction[0]})

if __name__ == "__main__":
    app.run(debug=True)
