In [1]:
import pandas as pd

# Provide the correct path to your CSV file
df = pd.read_csv(r'C:\Users\laptop\Desktop\exported_students_data.csv')

# Display the list of column names
print("Columns:", df.columns.tolist())

# Preview the first 3 rows of the dataset
print(df.head(3))


Columns: ['Full Name', 'Academic Performance', 'Attendance Percentage', 'Seat Zone', 'Grades', 'Academic Stress', 'Motivation', 'Depression', 'Sleep Disorder', 'Study Life Balance', 'Family Pressures', 'Parents Marital Status', 'Family Income Level', 'Housing Status', 'Has Private Study Room', 'Daily Food Availability', 'Has School Uniform', 'Has Stationery', 'Receives Private Tutoring', 'Daily Study Hours', 'Works After School', 'Has Electronic Device', 'Device Usage Purpose', 'Has Social Media Accounts', 'Daily Screen Time', 'Social Media Impact On Studies', 'Content Type Watched', 'Plays Video Games', 'Daily Gaming Hours']
           Full Name Academic Performance  Attendance Percentage Seat Zone  \
0     Joshua Douglas              Average                  86.16    Middle   
1  Christine Houston              Average                  96.45      Back   
2        Jill Joseph              Average                  75.05      Back   

                                              Grades 

In [2]:
print("\n--- df.info() ---")
print(df.info())

print("\n--- df.describe() ---")
print(df.describe())

print("\n--- Missing values per column ---")
print(df.isnull().sum())



--- df.info() ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 29 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Full Name                       5000 non-null   object 
 1   Academic Performance            5000 non-null   object 
 2   Attendance Percentage           5000 non-null   float64
 3   Seat Zone                       5000 non-null   object 
 4   Grades                          5000 non-null   object 
 5   Academic Stress                 5000 non-null   object 
 6   Motivation                      5000 non-null   object 
 7   Depression                      5000 non-null   bool   
 8   Sleep Disorder                  5000 non-null   object 
 9   Study Life Balance              5000 non-null   object 
 10  Family Pressures                5000 non-null   object 
 11  Parents Marital Status          5000 non-null   object 
 12  Family Income L

In [3]:
import numpy as np

def parse_grades(grades_str):
    parts = grades_str.split(';')
    scores = []
    for part in parts:
        part = part.strip()
        if part:
            try:
                subject, score = part.split(':')
                scores.append(float(score))
            except:
                continue
    return np.mean(scores) if scores else np.nan

df['Calculated_GPA'] = df['Grades'].apply(parse_grades)


In [4]:
def traditional_category(gpa, attendance):
    if gpa >= 90: 
        category = "Excellent"
    elif gpa >= 80: 
        category = "Very Good"
    elif gpa >= 70: 
        category = "Good"
    elif gpa >= 60: 
        category = "Average"
    else:
        category = "Needs Improvement"
    
    # Adjust category based on attendance percentage
    if attendance is not None:
        if attendance >= 95 and category != "Excellent":
            if category == "Very Good": 
                category = "Excellent"
            elif category == "Good": 
                category = "Very Good"
            elif category == "Average": 
                category = "Good"
            elif category == "Needs Improvement":
                category = "Average"
        elif attendance < 80 and category != "Needs Improvement":
            if category == "Excellent": 
                category = "Very Good"
            elif category == "Very Good": 
                category = "Good"
            elif category == "Good": 
                category = "Average"
            elif category == "Average": 
                category = "Needs Improvement"
    return category

df['Predicted_Performance_Traditional'] = df.apply(
    lambda row: traditional_category(row['Calculated_GPA'], row['Attendance Percentage']),
    axis=1
)


In [5]:
# Ordinal mapping for selected columns
ordinal_maps = {
    'Academic Stress': {'Low': 1, 'Moderate': 2, 'High': 3},
    'Motivation': {'Low': 1, 'Moderate': 2, 'High': 3},
    'Study Life Balance': {'Needs Improvement': 1, 'Moderate': 2, 'Good': 3},
    'Family Pressures': {'None': 0, 'Low': 1, 'Moderate': 2, 'High': 3},
    'Sleep Disorder': {'None': 0, 'Low': 1, 'Moderate': 2, 'High': 3}
}
for col, mapping in ordinal_maps.items():
    df[col] = df[col].map(mapping)

# Convert Boolean columns to integers
bool_cols = [
    'Depression', 'Has Private Study Room', 'Daily Food Availability',
    'Has School Uniform', 'Has Stationery', 'Receives Private Tutoring',
    'Works After School', 'Has Electronic Device', 'Has Social Media Accounts',
    'Plays Video Games'
]
for col in bool_cols:
    df[col] = df[col].astype(int)


In [6]:
# Define nominal features for One-Hot Encoding
nominal_features = [
    'Seat Zone',
    'Parents Marital Status',
    'Housing Status',
    'Device Usage Purpose',
    'Social Media Impact On Studies',
    'Content Type Watched'
]

# Define features (X) and target (y)
X = df.drop(columns=[
    'Full Name',
    'Grades',
    'Academic Performance',
    'Predicted_Performance_Traditional'
])
y = df['Academic Performance']

# Split the data into training and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)


In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Define numeric columns that require scaling
numeric_cols = [
    'Attendance Percentage', 'Family Income Level', 'Daily Study Hours',
    'Daily Screen Time', 'Daily Gaming Hours', 'Academic Stress',
    'Motivation', 'Depression', 'Sleep Disorder', 'Study Life Balance',
    'Family Pressures', 'Has Private Study Room', 'Daily Food Availability',
    'Has School Uniform', 'Has Stationery', 'Receives Private Tutoring',
    'Works After School', 'Has Electronic Device', 'Has Social Media Accounts',
    'Plays Video Games', 'Calculated_GPA'
]

preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numeric_cols),
    ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), nominal_features)
])


In [8]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder

# Encode the target variable
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc  = le.transform(y_test)

# Apply preprocessing to training and test sets
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed  = preprocessor.transform(X_test)

# Initialize and train the XGBoost classifier
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_model.fit(X_train_processed, y_train_enc)

# Make predictions on the test set
y_pred = xgb_model.predict(X_test_processed)

# Evaluate the model
acc = accuracy_score(y_test_enc, y_pred)
print("Accuracy on test set:", acc)

cm = confusion_matrix(y_test_enc, y_pred)
print("\nConfusion Matrix:")
print(cm)

print("\nClassification Report:")
print(classification_report(y_test_enc, y_pred, target_names=le.classes_))


Parameters: { "use_label_encoder" } are not used.



Accuracy on test set: 0.9866666666666667

Confusion Matrix:
[[1070    9    0]
 [   8  375    0]
 [   3    0   35]]

Classification Report:
                   precision    recall  f1-score   support

          Average       0.99      0.99      0.99      1079
             Good       0.98      0.98      0.98       383
Needs Improvement       1.00      0.92      0.96        38

         accuracy                           0.99      1500
        macro avg       0.99      0.96      0.98      1500
     weighted avg       0.99      0.99      0.99      1500



In [9]:
compare_table = pd.crosstab(
    df['Academic Performance'],           # Actual academic performance
    df['Predicted_Performance_Traditional'] # Traditional prediction based on GPA and attendance
)
print(compare_table)


Predicted_Performance_Traditional  Average  Excellent  Good  \
Academic Performance                                          
Average                               1299          0   627   
Good                                     0        329   358   
Needs Improvement                        1          0     0   

Predicted_Performance_Traditional  Needs Improvement  Very Good  
Academic Performance                                             
Average                                         1668          3  
Good                                               0        589  
Needs Improvement                                126          0  


In [10]:
class Student(models.Model):
    # Basic Information
    full_name = models.CharField(max_length=255)
    enrollment_date = models.DateField()
    date_of_birth = models.DateField()
    gender = models.CharField(max_length=100, choices=GenderChoices.choices)
    # ... (other fields)

    academic_performance = models.CharField(
        max_length=100,
        choices=ACADEMIC_PERFORMANCE_CHOICES,
        null=True, blank=True
    )
    
    # Business Logic: Compute Academic Performance
    def calculate_academic_performance(self):
        # Normalize academic metrics
        norm_attendance = self.attendance_percentage / 100.0
        norm_score = self.get_average_score() / 100.0
        # Encode psychosocial, technological, and socioeconomic factors
        # (using data from related HealthInformation, EconomicSituation, and SocialMediaAndTechnology models)
        # Apply weighted contributions and compute performance index
        perf_index = (/* computed value */)
        
        # Determine performance category based on index thresholds
        if perf_index >= 0.85:
            performance_category = "Excellent"
        elif perf_index >= 0.75:
            performance_category = "Very Good"
        elif perf_index >= 0.65:
            performance_category = "Good"
        elif perf_index >= 0.50:
            performance_category = "Average"
        else:
            performance_category = "Needs Improvement"
        return performance_category, perf_index


SyntaxError: invalid syntax (<ipython-input-10-17a4c652eeba>, line 23)