<a href="https://colab.research.google.com/github/TechSlinger/TechSlinger.github.io/blob/main/diagnostic_panne_v_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install transformers



In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
import torch
from torch.optim import AdamW
from sklearn.metrics import accuracy_score

# Define your symptoms and potential problems
symptoms = [
    "L'ordinateur ne démarre pas",
    "L'écran est noir",
    "L'ordinateur est lent",
    "L'ordinateur surchauffe",
    "Le réseau ne fonctionne pas",
    "Les périphériques USB ne sont pas reconnus",
    "L'écran clignote",
    "L'ordinateur émet des bip au démarrage",
    "Les applications se ferment de manière inattendue",
    "L'ordinateur affiche des messages d'erreur",
    "Les touches du clavier ne fonctionnent pas",
    "Les ventilateurs font un bruit anormal"
]

potential_problems = [
    "Problème d'alimentation, carte mère défectueuse",
    "Problème d'écran, carte graphique défectueuse",
    "Problème de mémoire, disque dur plein",
    "Problème de ventilation, accumulation de poussière",
    "Problème de connexion, carte réseau défectueuse",
    "Problème de pilotes, ports USB défectueux",
    "Problème de carte graphique, pilotes graphiques défectueux",
    "Problème de RAM, carte mère défectueuse",
    "Problème de mémoire, virus informatique",
    "Problème de système d'exploitation, fichiers système corrompus",
    "Problème de clavier, pilotes de clavier défectueux",
    "Problème de ventilateur, accumulation de poussière"
]

# Load the pre-trained BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(potential_problems))
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the input text and convert to PyTorch tensors
input_ids = tokenizer(symptoms, padding=True, truncation=True, return_tensors="pt")['input_ids']
labels = torch.tensor([potential_problems.index(problem) for problem in potential_problems])

# Create a PyTorch dataset
dataset = TensorDataset(input_ids, labels)

# Define a DataLoader for the dataset
dataloader = DataLoader(dataset, batch_size=4)

# Define the optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Fine-tune the model
model.train()
for batch in dataloader:
    optimizer.zero_grad()
    input_ids, labels = batch
    outputs = model(input_ids, labels=labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

# Save the fine-tuned model
model.save_pretrained('fine_tuned_model')

# Calculate accuracy
model.eval()
predictions = []
true_labels = []
for batch in dataloader:
    with torch.no_grad():
        input_ids, labels = batch
        outputs = model(input_ids)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=1).tolist())
        true_labels.extend(labels.tolist())

accuracy = accuracy_score(true_labels, predictions)
print(f"Accuracy: {accuracy}")


In [None]:
# Load the fine-tuned model
model = BertForSequenceClassification.from_pretrained('fine_tuned_model')

# Prepare your test data
test_symptoms = [
    "L'ordinateur ne démarre pas",
    "L'écran est noir",
    "L'ordinateur est lent"
]

# Tokenize the test data
test_input_ids = tokenizer(test_symptoms, padding=True, truncation=True, return_tensors="pt")['input_ids']

# Make predictions
model.eval()
with torch.no_grad():
    outputs = model(test_input_ids)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=1)

# Convert predictions to potential problems
predicted_problems = [potential_problems[pred.item()] for pred in predictions]

# Print the predicted potential problems
for symptom, problem in zip(test_symptoms, predicted_problems):
    print(f"Symptom: {symptom}\nPredicted Problem: {problem}\n")


Symptom: L'ordinateur ne démarre pas
Predicted Problem: Problème de mémoire, disque dur plein

Symptom: L'écran est noir
Predicted Problem: Problème de connexion, carte réseau défectueuse

Symptom: L'ordinateur est lent
Predicted Problem: Problème de mémoire, disque dur plein



In [2]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import torch
from torch.optim import AdamW
from sklearn.metrics import accuracy_score

# Define your symptoms and potential problems
symptoms = [
    "L'ordinateur ne démarre pas",
    "L'écran est noir",
    "L'ordinateur est lent",
    "L'ordinateur surchauffe",
    "Le réseau ne fonctionne pas",
    "Les périphériques USB ne sont pas reconnus",
    "L'écran clignote",
    "L'ordinateur émet des bip au démarrage",
    "Les applications se ferment de manière inattendue",
    "L'ordinateur affiche des messages d'erreur",
    "Les touches du clavier ne fonctionnent pas",
    "Les ventilateurs font un bruit anormal"
]

potential_problems = [
    "Problème d'alimentation, carte mère défectueuse",
    "Problème d'écran, carte graphique défectueuse",
    "Problème de mémoire, disque dur plein",
    "Problème de ventilation, accumulation de poussière",
    "Problème de connexion, carte réseau défectueuse",
    "Problème de pilotes, ports USB défectueux",
    "Problème de carte graphique, pilotes graphiques défectueux",
    "Problème de RAM, carte mère défectueuse",
    "Problème de mémoire, virus informatique",
    "Problème de système d'exploitation, fichiers système corrompus",
    "Problème de clavier, pilotes de clavier défectueux",
    "Problème de ventilateur, accumulation de poussière"
]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(symptoms, potential_problems, test_size=0.2, random_state=42)

# Load the pre-trained BERT model and tokenizer
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(potential_problems))
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the input text and convert to PyTorch tensors
train_input_ids = tokenizer(X_train, padding=True, truncation=True, return_tensors="pt")['input_ids']
train_labels = torch.tensor([potential_problems.index(problem) for problem in y_train])

# Create a PyTorch dataset
train_dataset = TensorDataset(train_input_ids, train_labels)

# Define a DataLoader for the training dataset
train_dataloader = DataLoader(train_dataset, batch_size=4)

# Define the optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Fine-tune the model
model.train()
for batch in train_dataloader:
    optimizer.zero_grad()
    input_ids, labels = batch
    outputs = model(input_ids, labels=labels)
    loss = outputs.loss
    loss.backward()
    optimizer.step()

# Save the fine-tuned model
model.save_pretrained('fine_tuned_model')

# Calculate accuracy on the testing set
test_input_ids = tokenizer(X_test, padding=True, truncation=True, return_tensors="pt")['input_ids']
test_labels = torch.tensor([potential_problems.index(problem) for problem in y_test])

test_dataset = TensorDataset(test_input_ids, test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=4)

model.eval()
predictions = []
true_labels = []
for batch in test_dataloader:
    with torch.no_grad():
        input_ids, labels = batch
        outputs = model(input_ids)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=1).tolist())
        true_labels.extend(labels.tolist())

accuracy = accuracy_score(true_labels, predictions)
print(f"Accuracy: {accuracy}")

# Allow the user to enter a symptom and predict potential problems
user_symptom = input("Enter a symptom: ")
user_input_ids = tokenizer(user_symptom, padding=True, truncation=True, return_tensors="pt")['input_ids']

model.eval()
with torch.no_grad():
    outputs = model(user_input_ids)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=1).item()

predicted_problem = potential_problems[prediction]
print(f"Symptom: {user_symptom}\nPredicted Problem: {predicted_problem}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


Accuracy: 0.0
Enter a symptom:     "L'ordinateur ne démarre pas"
Symptom:     "L'ordinateur ne démarre pas"
Predicted Problem: Problème de connexion, carte réseau défectueuse
