<a href="https://colab.research.google.com/github/Dhiyaneshwar-A/CAMPALIN_PROJ/blob/main/Student_Performance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **STUDENT PERFORMANCE PREDICTION by various models**

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import time
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron, LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score

**UNDERSTANDING DATA**

In [None]:
warnings.filterwarnings('ignore')

# Load the dataset
data = pd.read_csv("/content/AI-Data.csv")

# Display first few rows to understand the data structure
data.head()

Unnamed: 0,Stud_id,gender,NationalITy,PlaceofBirth,StageID,GradeID,SectionID,Topic,Semester,Relation,raisedhands,VisITedResources,AnnouncementsView,Discussion,ParentAnsweringSurvey,ParentschoolSatisfaction,StudentAbsenceDays,Class
0,1,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,15,16,2,20,Yes,Good,Under-7,M
1,2,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,20,20,3,25,Yes,Good,Under-7,M
2,3,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,10,7,0,30,No,Bad,Above-7,L
3,4,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,30,25,5,35,No,Bad,Above-7,L
4,5,M,KW,KuwaIT,lowerlevel,G-04,A,IT,F,Father,40,50,12,50,No,Bad,Above-7,M


**SETTING LABELS**

In [None]:
# Encode categorical variables
label_encoders = {}
categorical_cols = ['gender', 'NationalITy', 'PlaceofBirth', 'StageID', 'GradeID',
                    'SectionID', 'Topic', 'Semester', 'Relation', 'ParentAnsweringSurvey',
                    'ParentschoolSatisfaction', 'StudentAbsenceDays', 'Class']

for col in categorical_cols:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Feature Selection
# Selecting relevant features for prediction
features = ['gender', 'NationalITy', 'PlaceofBirth', 'StageID', 'GradeID',
            'SectionID', 'Topic', 'Semester', 'Relation', 'raisedhands',
            'VisITedResources', 'AnnouncementsView', 'Discussion',
            'ParentAnsweringSurvey', 'ParentschoolSatisfaction',
            'StudentAbsenceDays']

target = 'Class'

# Split the dataset into training and testing sets
X = data[features]
y = data[target]

# Standardize numerical features
scaler = StandardScaler()
numerical_cols = ['raisedhands', 'VisITedResources', 'AnnouncementsView', 'Discussion']
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Splitting data (70% training and 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)

**INITIALIZING MODELS**

In [None]:
# Initialize models
models = {
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Perceptron': Perceptron(random_state=42),
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'MLP Classifier': MLPClassifier(random_state=42, max_iter=1000)
}

# Train and evaluate models
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    #print(f"\n{name} Classification Report:")
    #print(classification_report(y_test, y_pred))
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {accuracy:.3f}")


Decision Tree Accuracy: 0.674
Random Forest Accuracy: 0.806
Perceptron Accuracy: 0.542
Logistic Regression Accuracy: 0.778
MLP Classifier Accuracy: 0.778


**PREDICT USING THE MODEL** : Predicting student performance based on 5 diffrent models

In [None]:
# Function to predict student performance based on student ID with detailed text output
def predict_student_performance(student_id):
    if student_id not in data['Stud_id'].values:
        print(f"Student ID {student_id} not found in the dataset.")
        return

    student_data = data[data['Stud_id'] == student_id]
    print(f"\nDetails for Student ID {student_id}:\n")

    # Decode categorical data for readability
    for col in categorical_cols:
        le = label_encoders[col]
        student_data[col] = le.inverse_transform(student_data[col])

    # Display relevant details
    details_cols = ['gender', 'NationalITy', 'PlaceofBirth', 'StageID', 'GradeID',
                    'SectionID', 'Topic', 'Semester', 'Relation', 'raisedhands',
                    'VisITedResources', 'AnnouncementsView', 'Discussion',
                    'ParentAnsweringSurvey', 'ParentschoolSatisfaction',
                    'StudentAbsenceDays']

    print(student_data[details_cols].to_string(index=False))

    # Prepare data for prediction
    student_features = student_data[features].copy()

    # Re-encode categorical features before prediction
    for col in categorical_cols:
        if col in student_features.columns:
            student_features[col] = label_encoders[col].transform(student_features[col])

    student_features[numerical_cols] = scaler.transform(student_features[numerical_cols])

    print("\nPredicted Performance Summary:\n")
    for name, model in models.items():
        prediction = model.predict(student_features)[0]
        performance = label_encoders['Class'].inverse_transform([prediction])[0]

        # Provide a detailed explanation for each model
        if performance == 'H':
          performance='GOOD'
          explanation = "STUDENT STUDY WELL"
        elif performance == 'M':
          performance='AVERAGE'
          explanation = "STUDENT STUDY AVERAGELY"
        elif performance == 'L':
          performance='POOR'
          explanation = "STUDENT STUDY POORLY"
        else:
            explanation = "The model's prediction is unclear or unexpected."

        print(f"{name} Prediction: {performance} - {explanation}\n")

# Example usage:
# Assuming 'Stud_id' is the actual column name representing unique student identifiers.
predict_student_performance(12)



Details for Student ID 12:

gender NationalITy PlaceofBirth      StageID GradeID SectionID Topic Semester Relation  raisedhands  VisITedResources  AnnouncementsView  Discussion ParentAnsweringSurvey ParentschoolSatisfaction StudentAbsenceDays
     M          KW       KuwaIT MiddleSchool    G-07         B  Math        F   Father           19                 6                 19          12                   Yes                     Good            Under-7

Predicted Performance Summary:

Decision Tree Prediction: AVERAGE - STUDENT STUDY AVERAGELY

Random Forest Prediction: AVERAGE - STUDENT STUDY AVERAGELY

Perceptron Prediction: POOR - STUDENT STUDY POORLY

Logistic Regression Prediction: AVERAGE - STUDENT STUDY AVERAGELY

MLP Classifier Prediction: AVERAGE - STUDENT STUDY AVERAGELY

