In [41]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score


etudiants = pd.read_csv("TB_ETUDIANT.csv", encoding='latin-1', delimiter=";")

#creation de dataframe
df = pd.DataFrame(etudiants)

df 

Unnamed: 0,ID_ETUDIANT,Sexe,MoyenneBac,Orientation,NotesMaths,NotesFrançais,NotesPhysChimie,NotesSVT,NotesAnglais,Interets1,Interets2,choixOrientation,Licence
0,ET001,F,15.0,L,16.0,16.0,12.75,8.00,6.0,agriculture,agro-alimentaire,agronomie,oui
1,ET002,F,10.0,S,7.0,15.0,20.00,20.00,13.0,Chimie,sociologie,Biologie,oui
2,ET003,F,10.0,S,14.0,17.0,17.00,17.00,9.0,Architecture,Mathematiques,Genie civil,oui
3,ET004,F,15.0,S,7.0,16.0,12.75,8.00,19.0,Informatique,Multimedia,Informatique,non
4,ET005,F,12.0,S,9.0,13.0,19.00,19.00,15.0,sociologie,Social,Medecine,oui
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1142,ET1145,M,10.0,S,9.0,13.5,14.00,18.00,6.0,Assurance banque,Mathematiques,Hotellerie,oui
1143,ET1146,M,14.0,S,20.0,20.0,20.00,12.00,13.0,Anglais,Mathematiques,informatique,oui
1144,ET1147,M,13.0,S,19.0,19.0,10.00,12.25,9.0,Mathematiques,Genie civil,architecture,oui
1145,ET1148,F,10.0,S,20.0,20.0,17.00,13.50,19.0,Anglais,informatique,informatique,oui


In [42]:
# Séparation des variables indépendantes (X) et de la variable cible (y)
X = df[['MoyenneBac', 'NotesMaths', 'NotesFrançais', 'NotesPhysChimie', 'NotesSVT', 'NotesAnglais',  'Interets1', 'Interets2']]
y = df['choixOrientation']


# Division des données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5)



In [43]:
categorical_cols = ['Interets1', 'Interets2']


preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ],
    remainder='passthrough'
)


decision_tree_model = DecisionTreeClassifier()


pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', decision_tree_model)
])


pipeline.fit(X_train, y_train)


y_pred = pipeline.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f'Precision du modele darbre de decison: {accuracy}')



Precision du modele darbrede decison: 0.1246376811594203


In [44]:
# Nouvel étudiant pour la prédiction
nouvel_etudiant = {
     'MoyenneBac' : 12,
    'NotesMaths': 15,
    'NotesFrançais': 16,
    'NotesPhysChimie': 12,
    'NotesSVT': 10,
    'NotesAnglais': 18,
    'Interets1': 'Chimie',  
    'Interets2': 'Hotellerie' 
}

# Création d'un DataFrame pour le nouvel étudiant
nouvel_etudiant_df = pd.DataFrame([nouvel_etudiant])

# Prédiction pour le nouvel étudiant
prediction_nouvel_etudiant = pipeline.predict(nouvel_etudiant_df)


# Prédiction pour le nouvel étudiant
prediction_nouvel_etudiant = pipeline.predict_proba(nouvel_etudiant_df)

# Get the probabilities of each class for the new student
probabilities = pipeline.predict_proba(nouvel_etudiant_df)

# Get the classes
classes = pipeline.classes_

# Create a dictionary to store the class probabilities
class_probabilities = dict(zip(classes, probabilities[0]))

# Sort the class probabilities in descending order
sorted_probabilities = sorted(class_probabilities.items(), key=lambda x: x[1], reverse=True)

# Select the top 3 to 5 recommended courses
top_recommended_courses = [course for course, _ in sorted_probabilities[:5]]

# Print the recommended courses
print("Recommended courses for the new student:")
for course in top_recommended_courses:
    print(course)




# Affichage de la prédiction pour le nouvel étudiant
# print("Recommandations d'orientation pour le nouvel étudiant :")
# for i, prediction in enumerate(prediction_nouvel_etudiant):
#     print(f"Prédiction {i + 1}: {prediction}")


Recommended courses for the new student:
Anglais
Architecture
Arts
Assurance Banque
Biologie
