In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/student111/Students Performance Prediction.csv


In [2]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
import pickle

# Load the dataset
df = pd.read_csv('/kaggle/input/student111/Students Performance Prediction.csv')

# Data Cleaning and Preprocessing
df['Notes'] = df['Notes'].replace('6', 'No')
df['Scholarship'] = df['Scholarship'].str.rstrip('%').replace('None', '0').astype(float) / 100
df['Student_Age'] = df['Student_Age'].apply(lambda x: '18' if x == '18' else '19-22' if x == '19-22' else '23-27')

# Define features and target
X = df.drop(['Grade', 'Student_ID'], axis=1)
y = df['Grade']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define transformers
numeric_features = ['Scholarship', 'Weekly_Study_Hours']
numeric_transformer = Pipeline(steps=[ 
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()) 
])

categorical_features = ['Student_Age', 'Sex', 'High_School_Type', 'Additional_Work', 
                        'Sports_activity', 'Transportation', 'Attendance', 'Reading', 
                        'Notes', 'Listening_in_Class', 'Project_work']
categorical_transformer = Pipeline(steps=[ 
    ('imputer', SimpleImputer(strategy='most_frequent')), 
    ('onehot', OneHotEncoder(handle_unknown='ignore')) 
])

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Full pipeline with classifier
model_pipeline = Pipeline(steps=[ 
    ('preprocessor', preprocessor), 
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train model
model_pipeline.fit(X_train, y_train)

# Evaluate
y_pred = model_pipeline.predict(X_test)
print(f"Model Accuracy: {accuracy_score(y_test, y_pred):.2f}")

# Save model
with open('student_grade_predictor.pkl', 'wb') as f:
    pickle.dump(model_pipeline, f)

# Function to predict new data
def predict_new_data(input_data):
    """
    Predict the grade for new input data using the trained model.
    """
    input_df = pd.DataFrame([input_data])
    
    # Ensure input scholarship is in the right format
    if input_df['Scholarship'].max() > 1:
        input_df['Scholarship'] = input_df['Scholarship'] / 100.0
    
    # Load model
    model = pickle.load(open('student_grade_predictor.pkl', 'rb'))
    
    # Predict
    return model.predict(input_df)

# Example input
input_example = {
    'Student_Age': '19-22',
    'Sex': 'Female',
    'High_School_Type': 'Private',
    'Scholarship': 75.0,  # percentage will be converted
    'Additional_Work': 'Yes',
    'Sports_activity': 'No',
    'Transportation': 'Private',
    'Weekly_Study_Hours': 20,
    'Attendance': 'Always',
    'Reading': 'Yes',
    'Notes': 'Yes',
    'Listening_in_Class': 'Yes',
    'Project_work': 'Yes'
}

# Predict grade
prediction = predict_new_data(input_example)
print(f"Predicted Grade for the input example: {prediction[0]}")


Model Accuracy: 0.24
Predicted Grade for the input example: AA
