In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix as sk_confusion_matrix
import joblib

# Load the dataset
df = pd.read_csv('C:\\Users\\Admin\\Desktop\\tttaskkk\\Student_performance_data _.csv')

# Data Cleaning
df = df.drop('StudentID', axis=1)

# Feature Engineering
df['ParentalEducation_StudyTime'] = df['ParentalEducation'] * df['StudyTimeWeekly']
df['StudyAbsenceRatio'] = df['StudyTimeWeekly'] / (df['Absences'] + 1)

# Advanced Feature Engineering
numerical_features = df[['Age', 'StudyTimeWeekly', 'Absences', 'GPA', 'ParentalEducation_StudyTime', 'GradeClass', 'StudyAbsenceRatio']]
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
poly_features = poly.fit_transform(numerical_features)
poly_features_df = pd.DataFrame(poly_features, columns=poly.get_feature_names_out(numerical_features.columns))
df = pd.concat([df, poly_features_df], axis=1)

# Save the polynomial features transformer
joblib.dump(poly, 'poly.pkl')

# Data Preparation
df = df.loc[:, ~df.columns.duplicated()]
x = df.drop('GradeClass', axis=1)
y = df['GradeClass']

# Train-Test Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Data Scaling
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Save the scaler
joblib.dump(scaler, 'scaler.pkl')

# Model Training
model = LogisticRegression(solver='liblinear')
model.fit(x_train_scaled, y_train)

# Save the model
joblib.dump(model, 'logistic_regression_model.pkl')

# Model Evaluation
y_pred = model.predict(x_test_scaled)

# Rename the variable to avoid conflict with the function name
confusion_matrix_result = sk_confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(confusion_matrix_result)

classification_report_result = classification_report(y_test, y_pred)
print("Classification Report:")
print(classification_report_result)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Confusion Matrix:
[[ 20   2   0   0   0]
 [  0  48   1   0   0]
 [  0   2  79   4   0]
 [  0   0   2  83   1]
 [  0   0   0   0 237]]
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.91      0.95        22
         1.0       0.92      0.98      0.95        49
         2.0       0.96      0.93      0.95        85
         3.0       0.95      0.97      0.96        86
         4.0       1.00      1.00      1.00       237

    accuracy                           0.97       479
   macro avg       0.97      0.96      0.96       479
weighted avg       0.98      0.97      0.97       479

Accuracy: 0.9749478079331941


NameError: name 'request' is not defined