In [1]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
def load_data():
    file_path = "Students _Performance _Prediction.csv"
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Preprocessing
label_encoders = {}
categorical_cols = ["Student_Age", "Sex", "High_School_Type", "Scholarship", "Additional_Work", 
                    "Sports_activity", "Transportation", "Attendance", "Reading", "Notes", 
                    "Listening_in_Class", "Project_work", "Grade"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data into train and test
X = df.drop(columns=["Grade", "Student_ID"])
y = df["Grade"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Streamlit UI
st.title("Student Performance Analysis")

# Display dataset
if st.checkbox("Show Raw Data"):
    st.write(df)

# Visualization: Grade Distribution
st.subheader("Grade Distribution")
grade_counts = df["Grade"].value_counts()
fig, ax = plt.subplots()
ax.bar(grade_counts.index, grade_counts.values, color='skyblue')
ax.set_xlabel("Grade")
ax.set_ylabel("Count")
st.pyplot(fig)

# Weekly Study Hours vs. Grade
st.subheader("Weekly Study Hours vs. Grade")
avg_study_hours = df.groupby("Grade")["Weekly_Study_Hours"].mean()
fig, ax = plt.subplots()
ax.bar(avg_study_hours.index, avg_study_hours.values, color='lightgreen')
ax.set_xlabel("Grade")
ax.set_ylabel("Avg Weekly Study Hours")
st.pyplot(fig)

# Filter by Student Age
target_age = st.selectbox("Select Age Group", df["Student_Age"].unique())
st.write(df[df["Student_Age"] == target_age])

# Model Evaluation
st.subheader("Model Accuracy")
st.write(f"Accuracy: {accuracy:.2f}")
st.text("Classification Report:")
st.text(report)

st.write("Analysis completed.")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2025-03-31 20:27:37.997 
  command:

    streamlit run C:\Users\hp\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-31 20:27:40.275 Session state does not function when running a script without `streamlit run`


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import plot_tree
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
df=pd.read_csv('Students _Performance _Prediction.csv')
de=pd.DataFrame(df)
print(de)
# Identify categorical columns and apply Label Encoding
categorical_columns = df.select_dtypes(include=['object']).columns  # Identify all categorical columns

# Apply Label Encoding to each categorical column
le = LabelEncoder()

for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

# Select features and target variable
X = df[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
           'Scholarship', 'Additional_Work', 'Sports_activity',
           'Transportation', 'Weekly_Study_Hours', 
           'Attendance', 'Reading', 
           'Notes', 'Listening_in_Class', 
           'Project_work']]  # Features
y = df['Grade']  # Target variable

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print(f"Accuracy of the Decision Tree Classifier: {accuracy:.2f}")
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

# Visualize the Decision Tree (optional)
plt.figure(figsize=(12, 8))
plot_tree(clf, filled=True, feature_names=X.columns, class_names=np.unique(y).astype(str), rounded=True)
plt.show()

     Student_ID Student_Age     Sex High_School_Type Scholarship  \
0      STUDENT1       19-22    Male            Other         50%   
1      STUDENT2       19-22    Male            Other         50%   
2      STUDENT3       19-22    Male            State         50%   
3      STUDENT4          18  Female          Private         50%   
4      STUDENT5       19-22    Male          Private         50%   
..          ...         ...     ...              ...         ...   
140  STUDENT141       19-22  Female            State         50%   
141  STUDENT142          18  Female            State         75%   
142  STUDENT143          18  Female          Private         75%   
143  STUDENT144       19-22  Female            State         75%   
144  STUDENT145          18  Female          Private        100%   

    Additional_Work Sports_activity Transportation  Weekly_Study_Hours  \
0               Yes              No        Private                   0   
1               Yes              No

  plt.show()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)
categorical_columns = data.select_dtypes(include=['object']).columns  
le = LabelEncoder()
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

X = data[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
           'Scholarship', 'Additional_Work', 'Sports_activity',
           'Transportation', 'Weekly_Study_Hours', 
           'Attendance', 'Reading', 
           'Notes', 'Listening_in_Class', 
           'Project_work']]
y = data['Grade']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# User Input for Prediction
student_id = input("Enter Student ID (STUDENT1 or STUDENT2 or STUDENT3): ")
student_age = int(input("Enter Student Age (19-22 or 18): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50% or 75% or 100%): "))
additional_work = input("Enter Additional Work (Yes or No): ")
sports_activity = input("Enter Sports Activity (Yes or No): ")
transportation = input("Enter Transportation (Private or Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (0 or 2 or 12): "))
attendance = float(input("Enter Attendance Percentage (1 or 2 or 3): "))
reading = input("Enter Reading Score (Yes or No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))


    input_data = {
    'Student_ID': student_id,
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])
# Exclude 'Grade' from transformation as it is not part of the input data
for col in categorical_columns:
    if col != 'Grade' and col in label_encoders:
        try:
            input_df[col] = label_encoders[col].transform(input_df[col])
        except ValueError:
            # Handle unseen labels by assigning a default value (e.g., the most frequent label or -1)
            input_df[col] = input_df[col].apply(lambda x: label_encoders[col].classes_[0] if x in label_encoders[col].classes_ else -1)
predicted_grade= regressor.predict(input_df)
defi=LabelEncoder(predicted_grade)
print(f"The predicted Grade is: {defi[0]:.2f}")

KeyboardInterrupt: 

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)

# Encode categorical columns and store label encoders
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features (X) and target (y)
X = data[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
          'Scholarship', 'Additional_Work', 'Sports_activity',
          'Transportation', 'Weekly_Study_Hours', 
          'Attendance', 'Reading', 
          'Notes', 'Listening_in_Class', 
          'Project_work']]
y = data['Grade']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

# Train model
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# ---- User Input Section ----
student_id = input("Enter Student ID (STUDENT1 or STUDENT2 or STUDENT3): ")
student_age = int(input("Enter Student Age (19-22 or 18): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50 or 75 or 100): "))
additional_work = input("Enter Additional Work (Yes or No): ")
sports_activity = input("Enter Sports Activity (Yes or No): ")
transportation = input("Enter Transportation (Private or Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (e.g. 2 or 12): "))
attendance = float(input("Enter Attendance Percentage (1 or 2 or 3): "))
reading = input("Enter Reading Score (Yes or No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

# Organize input into a dictionary
input_data = {
    'Student_ID': student_id,
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

# Convert input to DataFrame
input_df = pd.DataFrame([input_data])

# Encode input categorical columns using saved encoders
for col in categorical_columns:
    if col != 'Grade' and col in label_encoders:
        encoder = label_encoders[col]
        try:
            input_df[col] = encoder.transform(input_df[col])
        except ValueError:
            # Handle unseen labels with fallback to first known class
            input_df[col] = input_df[col].apply(
                lambda x: encoder.transform([encoder.classes_[0]])[0]
                if x not in encoder.classes_ else encoder.transform([x])[0]
            )

# Predict the grade
predicted_grade = regressor.predict(input_df)

# Print the prediction
print(f"\n🎯 The predicted Grade is: {predicted_grade[0]:.2f}")



🎯 The predicted Grade is: 2.42


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)

# Encode categorical columns and store label encoders
categorical_columns = data.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Define features and target
X = data[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
          'Scholarship', 'Additional_Work', 'Sports_activity',
          'Transportation', 'Weekly_Study_Hours', 
          'Attendance', 'Reading', 
          'Notes', 'Listening_in_Class', 
          'Project_work']]
y = data['Grade']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

# Train model
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# ---- User Input Section ----
student_id = input("Enter Student ID (STUDENT1 or STUDENT2 or STUDENT3): ")
student_age = int(input("Enter Student Age (e.g. 18-22): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50, 75, 100): "))
additional_work = input("Enter Additional Work (Yes/No): ")
sports_activity = input("Enter Sports Activity (Yes/No): ")
transportation = input("Enter Transportation (Private/Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (e.g. 2, 12): "))
attendance = float(input("Enter Attendance Score (1, 2, 3): "))
reading = input("Enter Reading Score (Yes/No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

# Organize user input
input_data = {
    'Student_ID': student_id,
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

# Convert to DataFrame
input_df = pd.DataFrame([input_data])

# Encode input using same encoders
for col in categorical_columns:
    if col != 'Grade' and col in label_encoders:
        encoder = label_encoders[col]
        try:
            input_df[col] = encoder.transform(input_df[col])
        except ValueError:
            input_df[col] = input_df[col].apply(
                lambda x: encoder.transform([encoder.classes_[0]])[0]
                if x not in encoder.classes_ else encoder.transform([x])[0]
            )

# Predict grade
predicted_grade = regressor.predict(input_df)
numeric_grade = predicted_grade[0]

# Convert to letter grade
def convert_to_letter_grade(grade):
    if grade >= 4.0 :
        return "A+"
    elif grade == 4.0:
        return "A"
    elif grade >= 3.75:
        return "A-"
    elif grade >= 3.5:
        return "B+"
    elif grade >= 3.0:
        return "B"
    elif grade >= 2.75:
        return "B-"
    elif grade >= 2.5:
        return "C+"
    elif grade >= 2.0:
        return "C"
    elif grade >= 1.75:
        return "C-"
    elif grade >= 1.0:
        return "D"
    else:
        return "F"

letter_grade = convert_to_letter_grade(numeric_grade)

# Output the result
print(f"\n🎯 The predicted Grade is: {numeric_grade:.2f} ➝ {letter_grade}")


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# -------------------------------------------
# 🎯 Mapping Functions
# -------------------------------------------
def map_grade_to_numeric_letter(score):
    if score < 40:
        return 0.0, 'F'
    elif 40 <= score < 45:
        return 1.0, 'D'
    elif 45 <= score < 50:
        return 1.75, 'C-'
    elif 50 <= score < 60:
        return 2.0, 'C'
    elif 60 <= score < 65:
        return 2.5, 'C+'
    elif 65 <= score < 70:
        return 2.75, 'B-'
    elif 70 <= score < 75:
        return 3.0, 'B'
    elif 75 <= score < 80:
        return 3.5, 'B+'
    elif 80 <= score < 85:
        return 3.75, 'A-'
    elif 85 <= score < 90:
        return 4.0, 'A'
    else:
        return 4.1, 'A+'

def convert_numeric_to_letter(grade):
    if grade < 1.0:
        return "F"
    elif grade < 1.75:
        return "D"
    elif grade < 2.0:
        return "C-"
    elif grade < 2.5:
        return "C"
    elif grade < 2.75:
        return "C+"
    elif grade < 3.0:
        return "B-"
    elif grade < 3.5:
        return "B"
    elif grade < 3.75:
        return "B+"
    elif grade < 4.0:
        return "A-"
    elif grade <= 4.0:
        return "A"
    else:
        return "A+"

# -------------------------------------------
# 📥 Load and Preprocess Dataset
# -------------------------------------------
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)

# Encode categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Map original percentage grades to numeric grades
df[['Numeric_Grade', 'Letter_Grade']] = df['Grade'].apply(
    lambda x: pd.Series(map_grade_to_numeric_letter(x))
)

# Features and target
feature_columns = ['Student_Age', 'Sex', 'High_School_Type',
                   'Scholarship', 'Additional_Work', 'Sports_activity',
                   'Transportation', 'Weekly_Study_Hours', 
                   'Attendance', 'Reading', 
                   'Notes', 'Listening_in_Class', 
                   'Project_work']

X = df[feature_columns]
y = df['Numeric_Grade']

# -------------------------------------------
# 🤖 Train Model
# -------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# -------------------------------------------
# 📤 User Input
# -------------------------------------------
def get_valid_input(prompt, input_type, valid_values=None):
    while True:
        user_input = input(prompt).strip()
        if not user_input:
            print("Input cannot be empty. Please try again.")
            continue
        try:
            value = input_type(user_input)
            if valid_values and value not in valid_values:
                print(f"Invalid value. Please enter one of the following: {valid_values}")
                continue
            return value
        except ValueError:
            print(f"Invalid input. Please enter a valid {input_type.__name__}.")

student_age = get_valid_input("Enter Student Age (e.g. 18-22): ", int)
sex = get_valid_input("Enter Sex (Male/Female): ", str, ["Male", "Female"])
high_school_type = get_valid_input("Enter High School Type (Public/Private): ", str, ["Public", "Private"])
scholarship = get_valid_input("Enter Scholarship (50, 75, 100): ", int, [50, 75, 100])
additional_work = get_valid_input("Enter Additional Work (Yes/No): ", str, ["Yes", "No"])
sports_activity = get_valid_input("Enter Sports Activity (Yes/No): ", str, ["Yes", "No"])
transportation = get_valid_input("Enter Transportation (Private/Bus): ", str, ["Private", "Bus"])
weekly_study_hours = get_valid_input("Enter Weekly Study Hours (e.g. 2, 12): ", float)
attendance = get_valid_input("Enter Attendance Score (1, 2, 3): ", float, [1, 2, 3])
reading = get_valid_input("Enter Reading Score (Yes/No): ", str, ["Yes", "No"])
notes = get_valid_input("Enter Notes Score (1 or 0): ", float, [0, 1])
listening_in_class = get_valid_input("Enter Listening in Class Score (1 or 0): ", float, [0, 1])
project_work = get_valid_input("Enter Project Work Score (1 or 0): ", float, [0, 1])

# Prepare input
input_data = {
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])

# Encode user input
for col in input_df.columns:
    if col in label_encoders:
        encoder = label_encoders[col]
        try:
            input_df[col] = encoder.transform(input_df[col])
        except ValueError:
            input_df[col] = input_df[col].apply(
                lambda x: encoder.transform([encoder.classes_[0]])[0]
                if x not in encoder.classes_ else encoder.transform([x])[0]
            )

# Reorder columns to match training
input_df = input_df[X.columns]

# -------------------------------------------
# 📈 Predict and Output
# -------------------------------------------
predicted_numeric_grade = regressor.predict(input_df)[0]
predicted_letter_grade = convert_numeric_to_letter(predicted_numeric_grade)

print(f"\n🎯 The predicted Grade is: {predicted_numeric_grade:.2f} ➝ {predicted_letter_grade}")

Invalid value. Please enter one of the following: ['Public', 'Private']
Invalid value. Please enter one of the following: ['Private', 'Bus']
Invalid value. Please enter one of the following: ['Private', 'Bus']

🎯 The predicted Grade is: 0.00 ➝ F


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

# -------------------------------------------
# 🎯 Mapping Functions
# -------------------------------------------
def map_grade_to_numeric_letter(score):
    if score < 40:
        return 0.0, 'F'
    elif 40 <= score < 45:
        return 1.0, 'D'
    elif 45 <= score < 50:
        return 1.75, 'C-'
    elif 50 <= score < 60:
        return 2.0, 'C'
    elif 60 <= score < 65:
        return 2.5, 'C+'
    elif 65 <= score < 70:
        return 2.75, 'B-'
    elif 70 <= score < 75:
        return 3.0, 'B'
    elif 75 <= score < 80:
        return 3.5, 'B+'
    elif 80 <= score < 85:
        return 3.75, 'A-'
    elif 85 <= score < 90:
        return 4.0, 'A'
    else:
        return 4.1, 'A+'

def convert_numeric_to_letter(grade):
    if grade < 1.0:
        return "F"
    elif grade < 1.75:
        return "D"
    elif grade < 2.0:
        return "C-"
    elif grade < 2.5:
        return "C"
    elif grade < 2.75:
        return "C+"
    elif grade < 3.0:
        return "B-"
    elif grade < 3.5:
        return "B"
    elif grade < 3.75:
        return "B+"
    elif grade < 4.0:
        return "A-"
    elif grade <= 4.0:
        return "A"
    else:
        return "A+"

# -------------------------------------------
# 📥 Load and Preprocess Dataset
# -------------------------------------------
df = pd.read_csv('Students _Performance _Prediction.csv')

# Drop rows with missing values
df.dropna(inplace=True)

# Encode categorical columns
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Ensure 'Grade' exists and convert to numeric/letter
if 'Grade' not in df.columns:
    raise ValueError("Missing 'Grade' column in dataset")

df[['Numeric_Grade', 'Letter_Grade']] = df['Grade'].apply(
    lambda x: pd.Series(map_grade_to_numeric_letter(x))
)

# -------------------------------------------
# 🧠 Features and Target
# -------------------------------------------
feature_columns = ['Student_Age', 'Sex', 'High_School_Type', 'Scholarship',
                   'Additional_Work', 'Sports_activity', 'Transportation',
                   'Weekly_Study_Hours', 'Attendance', 'Reading',
                   'Notes', 'Listening_in_Class', 'Project_work']

# Ensure all features are present
for col in feature_columns:
    if col not in df.columns:
        raise ValueError(f"Missing feature column: {col}")

X = df[feature_columns]
y = df['Numeric_Grade']

# -------------------------------------------
# 🚂 Train Model
# -------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
regressor = RandomForestRegressor(random_state=42)
regressor.fit(X_train, y_train)

# -------------------------------------------
# 📤 User Input
# -------------------------------------------
def get_valid_input(prompt, input_type, valid_values=None):
    while True:
        user_input = input(prompt).strip()
        if not user_input:
            print("Input cannot be empty. Please try again.")
            continue
        try:
            value = input_type(user_input)
            if valid_values and value not in valid_values:
                print(f"Invalid value. Please enter one of the following: {valid_values}")
                continue
            return value
        except ValueError:
            print(f"Invalid input. Please enter a valid {input_type.__name__}.")

# Collect user inputs
input_data = {
    'Student_Age': get_valid_input("Enter Student Age: ", int),
    'Sex': get_valid_input("Enter Sex (Male/Female): ", str, ["Male", "Female"]),
    'High_School_Type': get_valid_input("Enter High School Type (Public/Private): ", str, ["Public", "Private"]),
    'Scholarship': get_valid_input("Enter Scholarship (50, 75, 100): ", int, [50, 75, 100]),
    'Additional_Work': get_valid_input("Additional Work (Yes/No): ", str, ["Yes", "No"]),
    'Sports_activity': get_valid_input("Sports Activity (Yes/No): ", str, ["Yes", "No"]),
    'Transportation': get_valid_input("Transportation (Private/Bus): ", str, ["Private", "Bus"]),
    'Weekly_Study_Hours': get_valid_input("Weekly Study Hours: ", float),
    'Attendance': get_valid_input("Attendance Score (1, 2, 3): ", float, [1, 2, 3]),
    'Reading': get_valid_input("Reading (Yes/No): ", str, ["Yes", "No"]),
    'Notes': get_valid_input("Notes (1 or 0): ", float, [0, 1]),
    'Listening_in_Class': get_valid_input("Listening in Class (1 or 0): ", float, [0, 1]),
    'Project_work': get_valid_input("Project Work (1 or 0): ", float, [0, 1])
}

# -------------------------------------------
# 🔁 Encode and Predict
# -------------------------------------------
input_df = pd.DataFrame([input_data])

# Encode categorical inputs
for col in input_df.columns:
    if col in label_encoders:
        le = label_encoders[col]
        try:
            input_df[col] = le.transform(input_df[col])
        except ValueError:
            input_df[col] = input_df[col].apply(
                lambda x: le.transform([le.classes_[0]])[0]
                if x not in le.classes_ else le.transform([x])[0]
            )

# Ensure same column order
input_df = input_df[X.columns]

# Predict
predicted_numeric_grade = regressor.predict(input_df)[0]
predicted_letter_grade = convert_numeric_to_letter(predicted_numeric_grade)

# Output
print(f"\n🎯 The predicted Grade is: {predicted_numeric_grade:.2f} ➝ {predicted_letter_grade}")



🎯 The predicted Grade is: 0.00 ➝ F


In [22]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

# -------------------------------------------
# 📂 Load Dataset
# -------------------------------------------
file_path = "Students _Performance _Prediction.csv"  # ✅ Make sure file is correctly named

if not os.path.exists(file_path):
    raise FileNotFoundError(f"🚫 File not found: {file_path}")

df = pd.read_csv(file_path)

# -------------------------------------------
# 🧹 Initial Checks
# -------------------------------------------
print("✅ Data loaded. First 5 rows:")
print(df.head())
print("📊 Data shape:", df.shape)
print("📋 Columns:", df.columns.tolist())

# -------------------------------------------
# 🎯 Mapping Functions
# -------------------------------------------
def map_grade_to_numeric_letter(score):
    if score < 40:
        return 0.0, 'F'
    elif 40 <= score < 45:
        return 1.0, 'D'
    elif 45 <= score < 50:
        return 1.75, 'C-'
    elif 50 <= score < 60:
        return 2.0, 'C'
    elif 60 <= score < 65:
        return 2.5, 'C+'
    elif 65 <= score < 70:
        return 2.75, 'B-'
    elif 70 <= score < 75:
        return 3.0, 'B'
    elif 75 <= score < 80:
        return 3.5, 'B+'
    elif 80 <= score < 85:
        return 3.75, 'A-'
    elif 85 <= score < 90:
        return 4.0, 'A'
    else:
        return 4.1, 'A+'

def convert_numeric_to_letter(grade):
    if grade < 1.0:
        return "F"
    elif grade < 1.75:
        return "D"
    elif grade < 2.0:
        return "C-"
    elif grade < 2.5:
        return "C"
    elif grade < 2.75:
        return "C+"
    elif grade < 3.0:
        return "B-"
    elif grade < 3.5:
        return "B"
    elif grade < 3.75:
        return "B+"
    elif grade < 4.0:
        return "A-"
    elif grade <= 4.0:
        return "A"
    else:
        return "A+"

# -------------------------------------------
# 🧠 Preprocess
# -------------------------------------------
# Check for missing values
print("🕵️ Missing values:")
print(df.isnull().sum())

# Encode categorical columns
label_encoders = {}
categorical_columns = df.select_dtypes(include=['object']).columns

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# Map grades (assuming they are numeric scores)
df[['Numeric_Grade', 'Letter_Grade']] = df['Grade'].apply(lambda x: pd.Series(map_grade_to_numeric_letter(x)))

# Drop rows with missing data (after mapping)
df.dropna(inplace=True)

# Ensure dataset is not empty
if df.empty:
    raise ValueError("🚫 Dataset is empty after preprocessing. Please check the data format.")

# -------------------------------------------
# 🔧 Features & Target
# -------------------------------------------
feature_columns = ['Student_Age', 'Sex', 'High_School_Type',
                   'Scholarship', 'Additional_Work', 'Sports_activity',
                   'Transportation', 'Weekly_Study_Hours', 
                   'Attendance', 'Reading', 
                   'Notes', 'Listening_in_Class', 
                   'Project_work']

X = df[feature_columns]
y = df['Numeric_Grade']

# -------------------------------------------
# 🚂 Train/Test Split & Model
# -------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# -------------------------------------------
# 🧾 User Input Function
# -------------------------------------------
def get_valid_input(prompt, input_type, valid_values=None):
    while True:
        user_input = input(prompt).strip()
        if not user_input:
            print("⚠️ Input cannot be empty.")
            continue
        try:
            value = input_type(user_input)
            if valid_values and value not in valid_values:
                print(f"⚠️ Invalid value. Expected: {valid_values}")
                continue
            return value
        except ValueError:
            print(f"⚠️ Invalid input type. Expected {input_type.__name__}.")

# -------------------------------------------
# 👤 Collect Student Info
# -------------------------------------------
student_age = get_valid_input("Enter Student Age: ", int)
sex = get_valid_input("Enter Sex (Male/Female): ", str, ["Male", "Female"])
high_school_type = get_valid_input("High School Type (Public/Private): ", str, ["Public", "Private"])
scholarship = get_valid_input("Scholarship (50, 75, 100): ", int, [50, 75, 100])
additional_work = get_valid_input("Additional Work (Yes/No): ", str, ["Yes", "No"])
sports_activity = get_valid_input("Sports Activity (Yes/No): ", str, ["Yes", "No"])
transportation = get_valid_input("Transportation (Private/Bus): ", str, ["Private", "Bus"])
weekly_study_hours = get_valid_input("Weekly Study Hours: ", float)
attendance = get_valid_input("Attendance (1, 2, 3): ", float, [1, 2, 3])
reading = get_valid_input("Reading (Yes/No): ", str, ["Yes", "No"])
notes = get_valid_input("Notes (1 or 0): ", float, [0, 1])
listening_in_class = get_valid_input("Listening in Class (1 or 0): ", float, [0, 1])
project_work = get_valid_input("Project Work (1 or 0): ", float, [0, 1])

input_data = {
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

# -------------------------------------------
# 🔁 Encode User Input
# -------------------------------------------
input_df = pd.DataFrame([input_data])

for col in input_df.columns:
    if col in label_encoders:
        encoder = label_encoders[col]
        try:
            input_df[col] = encoder.transform(input_df[col])
        except ValueError:
            input_df[col] = encoder.transform([encoder.classes_[0]])[0]  # Default fallback

input_df = input_df[X.columns]

# -------------------------------------------
# 📈 Prediction
# -------------------------------------------
predicted_numeric_grade = regressor.predict(input_df)[0]
predicted_letter_grade = convert_numeric_to_letter(predicted_numeric_grade)

print(f"\n🎓 Predicted Grade: {predicted_numeric_grade:.2f} ➝ {predicted_letter_grade}")

✅ Data loaded. First 5 rows:
  Student_ID Student_Age     Sex High_School_Type Scholarship Additional_Work  \
0   STUDENT1       19-22    Male            Other         50%             Yes   
1   STUDENT2       19-22    Male            Other         50%             Yes   
2   STUDENT3       19-22    Male            State         50%              No   
3   STUDENT4          18  Female          Private         50%             Yes   
4   STUDENT5       19-22    Male          Private         50%              No   

  Sports_activity Transportation  Weekly_Study_Hours Attendance Reading Notes  \
0              No        Private                   0     Always     Yes   Yes   
1              No        Private                   0     Always     Yes    No   
2              No        Private                   2      Never      No    No   
3              No            Bus                   2     Always      No   Yes   
4              No            Bus                  12     Always     Yes    No  