In [1]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
def load_data():
    file_path = "Students _Performance _Prediction.csv"
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Preprocessing
label_encoders = {}
categorical_cols = ["Student_Age", "Sex", "High_School_Type", "Scholarship", "Additional_Work", 
                    "Sports_activity", "Transportation", "Attendance", "Reading", "Notes", 
                    "Listening_in_Class", "Project_work", "Grade"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data into train and test
X = df.drop(columns=["Grade", "Student_ID"])
y = df["Grade"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Streamlit UI
st.title("Student Performance Analysis")

# Display dataset
if st.checkbox("Show Raw Data"):
    st.write(df)

# Visualization: Grade Distribution
st.subheader("Grade Distribution")
grade_counts = df["Grade"].value_counts()
fig, ax = plt.subplots()
ax.bar(grade_counts.index, grade_counts.values, color='skyblue')
ax.set_xlabel("Grade")
ax.set_ylabel("Count")
st.pyplot(fig)

# Weekly Study Hours vs. Grade
st.subheader("Weekly Study Hours vs. Grade")
avg_study_hours = df.groupby("Grade")["Weekly_Study_Hours"].mean()
fig, ax = plt.subplots()
ax.bar(avg_study_hours.index, avg_study_hours.values, color='lightgreen')
ax.set_xlabel("Grade")
ax.set_ylabel("Avg Weekly Study Hours")
st.pyplot(fig)

# Filter by Student Age
target_age = st.selectbox("Select Age Group", df["Student_Age"].unique())
st.write(df[df["Student_Age"] == target_age])

# Model Evaluation
st.subheader("Model Accuracy")
st.write(f"Accuracy: {accuracy:.2f}")
st.text("Classification Report:")
st.text(report)

st.write("Analysis completed.")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2025-03-31 20:27:37.997 
  command:

    streamlit run C:\Users\hp\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-31 20:27:40.275 Session state does not function when running a script without `streamlit run`


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import plot_tree
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
df=pd.read_csv('Students _Performance _Prediction.csv')
de=pd.DataFrame(df)
print(de)
# Identify categorical columns and apply Label Encoding
categorical_columns = df.select_dtypes(include=['object']).columns  # Identify all categorical columns

# Apply Label Encoding to each categorical column
le = LabelEncoder()

for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

# Select features and target variable
X = df[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
           'Scholarship', 'Additional_Work', 'Sports_activity',
           'Transportation', 'Weekly_Study_Hours', 
           'Attendance', 'Reading', 
           'Notes', 'Listening_in_Class', 
           'Project_work']]  # Features
y = df['Grade']  # Target variable

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print(f"Accuracy of the Decision Tree Classifier: {accuracy:.2f}")
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

# Visualize the Decision Tree (optional)
plt.figure(figsize=(12, 8))
plot_tree(clf, filled=True, feature_names=X.columns, class_names=np.unique(y).astype(str), rounded=True)
plt.show()

     Student_ID Student_Age     Sex High_School_Type Scholarship  \
0      STUDENT1       19-22    Male            Other         50%   
1      STUDENT2       19-22    Male            Other         50%   
2      STUDENT3       19-22    Male            State         50%   
3      STUDENT4          18  Female          Private         50%   
4      STUDENT5       19-22    Male          Private         50%   
..          ...         ...     ...              ...         ...   
140  STUDENT141       19-22  Female            State         50%   
141  STUDENT142          18  Female            State         75%   
142  STUDENT143          18  Female          Private         75%   
143  STUDENT144       19-22  Female            State         75%   
144  STUDENT145          18  Female          Private        100%   

    Additional_Work Sports_activity Transportation  Weekly_Study_Hours  \
0               Yes              No        Private                   0   
1               Yes              No

  plt.show()


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)
categorical_columns = data.select_dtypes(include=['object']).columns  
le = LabelEncoder()
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

X = data[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
           'Scholarship', 'Additional_Work', 'Sports_activity',
           'Transportation', 'Weekly_Study_Hours', 
           'Attendance', 'Reading', 
           'Notes', 'Listening_in_Class', 
           'Project_work']]
y = data['Grade']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# User Input for Prediction
student_id = input("Enter Student ID (STUDENT1 or STUDENT2 or STUDENT3): ")
student_age = int(input("Enter Student Age (19-22 or 18): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50% or 75% or 100%): "))
additional_work = input("Enter Additional Work (Yes or No): ")
sports_activity = input("Enter Sports Activity (Yes or No): ")
transportation = input("Enter Transportation (Private or Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (0 or 2 or 12): "))
attendance = float(input("Enter Attendance Percentage (1 or 2 or 3): "))
reading = input("Enter Reading Score (Yes or No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

input_data = {
    'Student_ID': student_id,
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])
# Exclude 'Grade' from transformation as it is not part of the input data
for col in categorical_columns:
    if col != 'Grade' and col in label_encoders:
        try:
            input_df[col] = label_encoders[col].transform(input_df[col])
        except ValueError:
            # Handle unseen labels by assigning a default value (e.g., the most frequent label or -1)
            input_df[col] = input_df[col].apply(lambda x: label_encoders[col].classes_[0] if x in label_encoders[col].classes_ else -1)
predicted_grade= regressor.predict(input_df)
print(f"The predicted Grade is: {predicted_grade[0]:.2f}")

The predicted Grade is: 3.78


In [26]:
# Student Grade Prediction (Jupyter Notebook Compatible)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import numpy as np
import matplotlib.pyplot as plt

# ----------------------------
# 🎯 Grade Mapping Functions
# ----------------------------
def map_grade_letter_to_numeric(letter):
    mapping = {
        "F": 0.0, "D": 1.0, "C-": 1.75, "C": 2.0, "C+": 2.5,
        "B-": 2.75, "B": 3.0, "B+": 3.5, "A-": 3.75, "A": 4.0, "A+": 4.1, "AA": 4.0
    }
    return mapping.get(str(letter).upper(), 0.0)

def convert_numeric_to_letter(grade):
    if grade < 1.0:
        return "F"
    elif grade < 1.75:
        return "D"
    elif grade < 2.0:
        return "C-"
    elif grade < 2.5:
        return "C"
    elif grade < 2.75:
        return "C+"
    elif grade < 3.0:
        return "B-"
    elif grade < 3.5:
        return "B"
    elif grade < 3.75:
        return "B+"
    elif grade < 4.0:
        return "A-"
    elif grade <= 4.0:
        return "A"
    else:
        return "A+"

# ----------------------------
# 🔁 Age Conversion Helper
# ----------------------------
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# ----------------------------
# 📥 Load and Preprocess Data
# ----------------------------
data = pd.read_csv("Students _Performance _Prediction.csv")

data['Scholarship'] = data['Scholarship'].str.replace('%', '', regex=False).astype(float)
data['Student_Age'] = data['Student_Age'].apply(convert_age)
data['Grade'] = data['Grade'].apply(map_grade_letter_to_numeric)

# Encode categoricals
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Features & Target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# ----------------------------
# 🧪 Predict on Test Set
# ----------------------------
y_pred = model.predict(X_test)
results = pd.DataFrame({
    'Actual (Numeric)': y_test,
    'Predicted (Numeric)': y_pred,
    'Predicted (Letter)': [convert_numeric_to_letter(p) for p in y_pred]
})

# Display prediction results
print(results.head())

# Optional: Plotting
plt.figure(figsize=(10,6))
plt.plot(results['Actual (Numeric)'].values, label='Actual', marker='o')
plt.plot(results['Predicted (Numeric)'].values, label='Predicted', marker='x')
plt.title('Actual vs Predicted Grades')
plt.xlabel('Sample Index')
plt.ylabel('Grade')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

     Actual (Numeric)  Predicted (Numeric) Predicted (Letter)
69                0.0                2.600                 C+
140               0.0                1.200                  D
27                4.0                2.040                  C
19                0.0                0.960                  F
42                4.0                0.304                  F


  plt.show()


In [28]:
# model.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping functions
def convert_numeric_to_letter(score):
    # Grade scale based on the score
    if score < 40:
        return "F"
    elif 40 <= score < 45:
        return "D"
    elif 45 <= score < 50:
        return "C-"
    elif 50 <= score < 60:
        return "C"
    elif 60 <= score < 65:
        return "C+"
    elif 65 <= score < 70:
        return "B-"
    elif 70 <= score < 75:
        return "B"
    elif 75 <= score < 80:
        return "B+"
    elif 80 <= score < 85:
        return "A-"
    elif 85 <= score < 90:
        return "A"
    elif 90 <= score <= 100:
        return "A+"
    else:
        return "Invalid"  # In case score is outside 0-100

# Safe encoding function
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")
data['Scholarship'] = data['Scholarship'].str.replace('%', '', regex=False).astype(float)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)

# Calculate the accuracy or any other evaluation metrics you prefer
# For simplicity, let's print the first few predictions along with actual values
predictions = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(predictions.head())

# You can also calculate the performance metrics such as Mean Squared Error, R^2 etc.
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

     Actual  Predicted
69        4       2.12
140       4       1.75
27        0       1.47
19        2       3.35
42        0       3.25
Mean Squared Error: 8.72335962962963
R^2 Score: -0.5503601237493416


In [29]:
# model.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping functions
def convert_numeric_to_letter(score):
    # Grade scale based on the score
    if score < 40:
        return "F"
    elif 40 <= score < 45:
        return "D"
    elif 45 <= score < 50:
        return "C-"
    elif 50 <= score < 60:
        return "C"
    elif 60 <= score < 65:
        return "C+"
    elif 65 <= score < 70:
        return "B-"
    elif 70 <= score < 75:
        return "B"
    elif 75 <= score < 80:
        return "B+"
    elif 80 <= score < 85:
        return "A-"
    elif 85 <= score < 90:
        return "A"
    elif 90 <= score <= 100:
        return "A+"
    else:
        return "Invalid"  # In case score is outside 0-100

# Safe encoding function
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")
data['Scholarship'] = data['Scholarship'].str.replace('%', '', regex=False).astype(float)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Function for prediction based on user input
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': float(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    letter_grade = convert_numeric_to_letter(prediction)
    return prediction, letter_grade

# Example of how you can provide user input and get predictions:
user_input = {
    'Student_Age': '20',
    'Sex': 'Male',
    'High_School_Type': 'Private',
    'Scholarship': '75',
    'Additional_Work': 'No',
    'Sports_activity': 'Yes',
    'Transportation': 'Bus',
    'Weekly_Study_Hours': '10',
    'Attendance': 'Always',
    'Reading': 'Yes',
    'Notes': 'Yes',
    'Listening_in_Class': 'Yes',
    'Project_work': 'Yes'
}

# Call the prediction function with user input
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")

Predicted Score: 2.58, Predicted Grade: F


In [30]:
# model.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping functions
def convert_numeric_to_letter(score):
    # Grade scale based on the score
    if score < 40:
        return "F"
    elif 40 <= score < 45:
        return "D"
    elif 45 <= score < 50:
        return "C-"
    elif 50 <= score < 60:
        return "C"
    elif 60 <= score < 65:
        return "C+"
    elif 65 <= score < 70:
        return "B-"
    elif 70 <= score < 75:
        return "B"
    elif 75 <= score < 80:
        return "B+"
    elif 80 <= score < 85:
        return "A-"
    elif 85 <= score < 90:
        return "A"
    elif 90 <= score <= 100:
        return "A+"
    else:
        return "Invalid"  # In case score is outside 0-100

# Safe encoding function
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")
data['Scholarship'] = data['Scholarship'].str.replace('%', '', regex=False).astype(float)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Function for prediction based on user input
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': float(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    letter_grade = convert_numeric_to_letter(prediction)
    return prediction, letter_grade

# Provide your own input values manually
user_input = {
    'Student_Age': '20',  # Age
    'Sex': 'Male',  # Sex
    'High_School_Type': 'Private',  # High School Type
    'Scholarship': '75',  # Scholarship percentage
    'Additional_Work': 'No',  # Additional Work (Yes/No)
    'Sports_activity': 'Yes',  # Sports Activity (Yes/No)
    'Transportation': 'Bus',  # Transportation (Bus/Private)
    'Weekly_Study_Hours': '10',  # Weekly Study Hours
    'Attendance': 'Always',  # Attendance (Always/Never/Sometimes)
    'Reading': 'Yes',  # Reading (Yes/No)
    'Notes': 'Yes',  # Takes Notes (Yes/No)
    'Listening_in_Class': 'Yes',  # Listening in Class (Yes/No)
    'Project_work': 'Yes'  # Project Work (Yes/No)
}

# Call the prediction function with your input
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")

Predicted Score: 2.58, Predicted Grade: F


In [31]:
# model.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping functions
def convert_numeric_to_letter(score):
    # Grade scale based on the score
    if score < 1:
        return "F"
    elif 1 <= score < 1.75:
        return "D"
    elif 1.75 <= score < 2:
        return "C-"
    elif 2 <= score < 2.5:
        return "C"
    elif 2.5 <= score < 2.75:
        return "C+"
    elif 2.75 <= score < 3:
        return "B-"
    elif 3 <= score < 3.5:
        return "B"
    elif 3.5 <= score < 3.75:
        return "B+"
    elif 3.75 <= score < 4:
        return "A-"
    elif 4 <= score < 4.25:
        return "A"
    else:
        return "A+"  # For any score greater than or equal to 4

# Safe encoding function
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")
data['Scholarship'] = data['Scholarship'].str.replace('%', '', regex=False).astype(float)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Function for prediction based on user input
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': float(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    letter_grade = convert_numeric_to_letter(prediction)
    return prediction, letter_grade

# Provide your own input values manually
user_input = {
    'Student_Age': '19-22',  # Age (range)
    'Sex': 'Male',  # Sex
    'High_School_Type': 'Other',  # High School Type
    'Scholarship': '50%',  # Scholarship percentage
    'Additional_Work': 'Yes',  # Additional Work (Yes/No)
    'Sports_activity': 'No',  # Sports Activity (Yes/No)
    'Transportation': 'Private',  # Transportation (Private/Bus)
    'Weekly_Study_Hours': '0',  # Weekly Study Hours
    'Attendance': 'Always',  # Attendance (Always/Never/Sometimes)
    'Reading': 'Yes',  # Reading (Yes/No)
    'Notes': 'Yes',  # Takes Notes (Yes/No)
    'Listening_in_Class': 'No',  # Listening in Class (Yes/No)
    'Project_work': 'No'  # Project Work (Yes/No)
}

# Call the prediction function with your input
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")

ValueError: could not convert string to float: '50%'

In [32]:
# model.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping functions
def convert_numeric_to_letter(score):
    # Grade scale based on the score
    if score < 1:
        return "F"
    elif 1 <= score < 1.75:
        return "D"
    elif 1.75 <= score < 2:
        return "C-"
    elif 2 <= score < 2.5:
        return "C"
    elif 2.5 <= score < 2.75:
        return "C+"
    elif 2.75 <= score < 3:
        return "B-"
    elif 3 <= score < 3.5:
        return "B"
    elif 3.5 <= score < 3.75:
        return "B+"
    elif 3.75 <= score < 4:
        return "A-"
    elif 4 <= score < 4.25:
        return "A"
    else:
        return "A+"  # For any score greater than or equal to 4

# Safe encoding function
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Scholarship conversion function
def convert_scholarship(scholarship_value):
    return float(scholarship_value.replace('%', '').strip()) / 100

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")
data['Scholarship'] = data['Scholarship'].apply(convert_scholarship)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Function for prediction based on user input
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': convert_scholarship(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    letter_grade = convert_numeric_to_letter(prediction)
    return prediction, letter_grade

# Provide your own input values manually
user_input = {
    'Student_Age': '19-22',  # Age (range)
    'Sex': 'Male',  # Sex
    'High_School_Type': 'Other',  # High School Type
    'Scholarship': '50%',  # Scholarship percentage
    'Additional_Work': 'Yes',  # Additional Work (Yes/No)
    'Sports_activity': 'No',  # Sports Activity (Yes/No)
    'Transportation': 'Private',  # Transportation (Private/Bus)
    'Weekly_Study_Hours': '0',  # Weekly Study Hours
    'Attendance': 'Always',  # Attendance (Always/Never/Sometimes)
    'Reading': 'Yes',  # Reading (Yes/No)
    'Notes': 'Yes',  # Takes Notes (Yes/No)
    'Listening_in_Class': 'No',  # Listening in Class (Yes/No)
    'Project_work': 'No'  # Project Work (Yes/No)
}

# Call the prediction function with your input
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")

AttributeError: 'float' object has no attribute 'replace'

In [33]:
# model.ipynb

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping functions
def convert_numeric_to_letter(score):
    # Grade scale based on the score
    if score < 1:
        return "F"
    elif 1 <= score < 1.75:
        return "D"
    elif 1.75 <= score < 2:
        return "C-"
    elif 2 <= score < 2.5:
        return "C"
    elif 2.5 <= score < 2.75:
        return "C+"
    elif 2.75 <= score < 3:
        return "B-"
    elif 3 <= score < 3.5:
        return "B"
    elif 3.5 <= score < 3.75:
        return "B+"
    elif 3.75 <= score < 4:
        return "A-"
    elif 4 <= score < 4.25:
        return "A"
    else:
        return "A+"  # For any score greater than or equal to 4

# Safe encoding function
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Scholarship conversion function (fixed)
def convert_scholarship(scholarship_value):
    # Check if the value is a string with a '%' symbol
    if isinstance(scholarship_value, str):
        return float(scholarship_value.replace('%', '').strip()) / 100
    # If it's already a float, just return it as is
    return float(scholarship_value) / 100

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")
data['Scholarship'] = data['Scholarship'].apply(convert_scholarship)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Function for prediction based on user input
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': convert_scholarship(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    letter_grade = convert_numeric_to_letter(prediction)
    return prediction, letter_grade

# Provide your own input values manually
user_input = {
    'Student_Age': '19-22',  # Age (range)
    'Sex': 'Male',  # Sex
    'High_School_Type': 'Other',  # High School Type
    'Scholarship': '50%',  # Scholarship percentage
    'Additional_Work': 'Yes',  # Additional Work (Yes/No)
    'Sports_activity': 'No',  # Sports Activity (Yes/No)
    'Transportation': 'Private',  # Transportation (Private/Bus)
    'Weekly_Study_Hours': '0',  # Weekly Study Hours
    'Attendance': 'Always',  # Attendance (Always/Never/Sometimes)
    'Reading': 'Yes',  # Reading (Yes/No)
    'Notes': 'Yes',  # Takes Notes (Yes/No)
    'Listening_in_Class': 'No',  # Listening in Class (Yes/No)
    'Project_work': 'No'  # Project Work (Yes/No)
}

# Call the prediction function with your input
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")

Predicted Score: 0.71, Predicted Grade: F


In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping function based on your criteria
def convert_numeric_to_letter(score):
    if score < 40:
        return "F"
    elif 40 <= score < 45:
        return "D"
    elif 45 <= score < 50:
        return "C-"
    elif 50 <= score < 60:
        return "C"
    elif 60 <= score < 65:
        return "C+"
    elif 65 <= score < 70:
        return "B-"
    elif 70 <= score < 75:
        return "B"
    elif 75 <= score < 80:
        return "B+"
    elif 80 <= score < 85:
        return "A-"
    elif 85 <= score < 90:
        return "A"
    elif 90 <= score <= 100:
        return "A+"
    else:
        return "Invalid"  # In case score is outside 0-100

# Safe encoding function for categorical data
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper function
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Scholarship conversion function
def convert_scholarship(scholarship_value):
    # Assuming the format is in percentage, e.g., '50%'
    return float(scholarship_value.replace('%', '').strip()) / 100

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")

# Convert columns
data['Scholarship'] = data['Scholarship'].apply(convert_scholarship)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target variable
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict Grade Function
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': convert_scholarship(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    # Convert input to DataFrame and make prediction
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    
    # Convert the numeric score to letter grade
    letter_grade = convert_numeric_to_letter(prediction)
    
    return prediction, letter_grade

# Example user input for testing
user_input = {
    'Student_Age': '19-22',  # Age range
    'Sex': 'Male',  # Gender
    'High_School_Type': 'Other',  # Type of High School
    'Scholarship': '50%',  # Scholarship in percentage
    'Additional_Work': 'Yes',  # Additional work involvement
    'Sports_activity': 'No',  # Sports activity involvement
    'Transportation': 'Private',  # Transportation mode
    'Weekly_Study_Hours': '0',  # Weekly study hours
    'Attendance': 'Always',  # Attendance level
    'Reading': 'Yes',  # Reading habit
    'Notes': 'Yes',  # Note-taking habit
    'Listening_in_Class': 'No',  # Listening during class
    'Project_work': 'No'  # Project work involvement
}

# Get the prediction
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")

AttributeError: 'float' object has no attribute 'replace'

In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Grade mapping function based on your criteria
def convert_numeric_to_letter(score):
    if score < 40:
        return "F"
    elif 40 <= score < 45:
        return "D"
    elif 45 <= score < 50:
        return "C-"
    elif 50 <= score < 60:
        return "C"
    elif 60 <= score < 65:
        return "C+"
    elif 65 <= score < 70:
        return "B-"
    elif 70 <= score < 75:
        return "B"
    elif 75 <= score < 80:
        return "B+"
    elif 80 <= score < 85:
        return "A-"
    elif 85 <= score < 90:
        return "A"
    elif 90 <= score <= 100:
        return "A+"
    else:
        return "Invalid"  # In case score is outside 0-100

# Safe encoding function for categorical data
def safe_transform(label, encoder):
    if label in encoder.classes_:
        return encoder.transform([label])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, label)
        return encoder.transform([label])[0]

# Age conversion helper function
def convert_age(age_value):
    if '-' in str(age_value):
        parts = age_value.split('-')
        return (float(parts[0]) + float(parts[1])) / 2
    try:
        return float(age_value)
    except:
        return None

# Scholarship conversion function to handle both strings and numbers
def convert_scholarship(scholarship_value):
    if isinstance(scholarship_value, str):  # Check if the value is a string
        return float(scholarship_value.replace('%', '').strip()) / 100
    else:
        return float(scholarship_value)  # If it's already a float, return it as is

# Load and preprocess data
data = pd.read_csv("Students _Performance _Prediction.csv")

# Convert columns
data['Scholarship'] = data['Scholarship'].apply(convert_scholarship)
data['Student_Age'] = data['Student_Age'].apply(convert_age)

# Encode categorical columns
categorical_columns = data.select_dtypes(include='object').columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target variable
features = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship', 'Additional_Work',
            'Sports_activity', 'Transportation', 'Weekly_Study_Hours', 'Attendance',
            'Reading', 'Notes', 'Listening_in_Class', 'Project_work']
X = data[features]
y = data['Grade']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict Grade Function
def predict_grade(user_input):
    input_dict = {
        'Student_Age': convert_age(user_input['Student_Age']),
        'Sex': safe_transform(user_input['Sex'], label_encoders['Sex']),
        'High_School_Type': safe_transform(user_input['High_School_Type'], label_encoders['High_School_Type']),
        'Scholarship': convert_scholarship(user_input['Scholarship']),
        'Additional_Work': safe_transform(user_input['Additional_Work'], label_encoders['Additional_Work']),
        'Sports_activity': safe_transform(user_input['Sports_activity'], label_encoders['Sports_activity']),
        'Transportation': safe_transform(user_input['Transportation'], label_encoders['Transportation']),
        'Weekly_Study_Hours': float(user_input['Weekly_Study_Hours']),
        'Attendance': safe_transform(user_input['Attendance'], label_encoders['Attendance']),
        'Reading': safe_transform(user_input['Reading'], label_encoders['Reading']),
        'Notes': safe_transform(user_input['Notes'], label_encoders['Notes']),
        'Listening_in_Class': safe_transform(user_input['Listening_in_Class'], label_encoders['Listening_in_Class']),
        'Project_work': safe_transform(user_input['Project_work'], label_encoders['Project_work'])
    }
    
    # Convert input to DataFrame and make prediction
    input_df = pd.DataFrame([input_dict])
    prediction = model.predict(input_df)[0]
    
    # Convert the numeric score to letter grade
    letter_grade = convert_numeric_to_letter(prediction)
    
    return prediction, letter_grade

# Example user input for testing
user_input = {
    'Student_Age': '19-22',  # Age range
    'Sex': 'Male',  # Gender
    'High_School_Type': 'Other',  # Type of High School
    'Scholarship': '50%',  # Scholarship in percentage
    'Additional_Work': 'Yes',  # Additional work involvement
    'Sports_activity': 'No',  # Sports activity involvement
    'Transportation': 'Private',  # Transportation mode
    'Weekly_Study_Hours': '0',  # Weekly study hours
    'Attendance': 'Always',  # Attendance level
    'Reading': 'Yes',  # Reading habit
    'Notes': 'Yes',  # Note-taking habit
    'Listening_in_Class': 'No',  # Listening during class
    'Project_work': 'No'  # Project work involvement
}

# Get the prediction
predicted_score, predicted_grade = predict_grade(user_input)
print(f"Predicted Score: {predicted_score:.2f}, Predicted Grade: {predicted_grade}")


Predicted Score: 0.71, Predicted Grade: F


In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('Students _Performance _Prediction.csv')

# Data cleaning and preprocessing
# Handle '6' in Notes column
df['Notes'] = df['Notes'].replace('6', 'No')

# Convert Scholarship to numerical
df['Scholarship'] = df['Scholarship'].str.rstrip('%').replace('None', '0').astype(float) / 100

# Convert Student_Age to categorical
df['Student_Age'] = df['Student_Age'].apply(lambda x: '18' if x == '18' else '19-22' if x == '19-22' else '23-27')

# Define features and target
X = df.drop(['Grade', 'Student_ID'], axis=1)
y = df['Grade']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing pipeline
numeric_features = ['Scholarship', 'Weekly_Study_Hours']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_features = ['Student_Age', 'Sex', 'High_School_Type', 'Additional_Work', 
                        'Sports_activity', 'Transportation', 'Attendance', 'Reading', 
                        'Notes', 'Listening_in_Class', 'Project_work']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Create model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Train the model
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"Model Accuracy: {accuracy_score(y_test, y_pred):.2f}")

# Function to get user input and make prediction
def predict_grade():
    print("Please enter the following student details:")
    
    features = {
        'Student_Age': input("Age group (18/19-22/23-27): "),
        'Sex': input("Gender (Male/Female): "),
        'High_School_Type': input("High School Type (State/Private/Other): "),
        'Scholarship': float(input("Scholarship percentage (0-100, without %): ")) / 100,
        'Additional_Work': input("Additional Work (Yes/No): "),
        'Sports_activity': input("Sports Activity (Yes/No): "),
        'Transportation': input("Transportation (Private/Bus): "),
        'Weekly_Study_Hours': float(input("Weekly Study Hours: ")),
        'Attendance': input("Attendance (Always/Never/Sometimes): "),
        'Reading': input("Reading Habit (Yes/No): "),
        'Notes': input("Takes Notes (Yes/No): "),
        'Listening_in_Class': input("Listens in Class (Yes/No): "),
        'Project_work': input("Does Project Work (Yes/No): ")
    }
    
    input_df = pd.DataFrame([features])
    prediction = model.predict(input_df)
    print(f"\nPredicted Grade: {prediction[0]}")

# Example usage
predict_grade()

Model Accuracy: 0.24
Please enter the following student details:

Predicted Grade: BA
