In [1]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
def load_data():
    file_path = "Students _Performance _Prediction.csv"
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Preprocessing
label_encoders = {}
categorical_cols = ["Student_Age", "Sex", "High_School_Type", "Scholarship", "Additional_Work", 
                    "Sports_activity", "Transportation", "Attendance", "Reading", "Notes", 
                    "Listening_in_Class", "Project_work", "Grade"]

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data into train and test
X = df.drop(columns=["Grade", "Student_ID"])
y = df["Grade"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Streamlit UI
st.title("Student Performance Analysis")

# Display dataset
if st.checkbox("Show Raw Data"):
    st.write(df)

# Visualization: Grade Distribution
st.subheader("Grade Distribution")
grade_counts = df["Grade"].value_counts()
fig, ax = plt.subplots()
ax.bar(grade_counts.index, grade_counts.values, color='skyblue')
ax.set_xlabel("Grade")
ax.set_ylabel("Count")
st.pyplot(fig)

# Weekly Study Hours vs. Grade
st.subheader("Weekly Study Hours vs. Grade")
avg_study_hours = df.groupby("Grade")["Weekly_Study_Hours"].mean()
fig, ax = plt.subplots()
ax.bar(avg_study_hours.index, avg_study_hours.values, color='lightgreen')
ax.set_xlabel("Grade")
ax.set_ylabel("Avg Weekly Study Hours")
st.pyplot(fig)

# Filter by Student Age
target_age = st.selectbox("Select Age Group", df["Student_Age"].unique())
st.write(df[df["Student_Age"] == target_age])

# Model Evaluation
st.subheader("Model Accuracy")
st.write(f"Accuracy: {accuracy:.2f}")
st.text("Classification Report:")
st.text(report)

st.write("Analysis completed.")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2025-03-31 20:27:37.997 
  command:

    streamlit run C:\Users\hp\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-31 20:27:40.275 Session state does not function when running a script without `streamlit run`


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import plot_tree
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
df=pd.read_csv('Students _Performance _Prediction.csv')
de=pd.DataFrame(df)
print(de)
# Identify categorical columns and apply Label Encoding
categorical_columns = df.select_dtypes(include=['object']).columns  # Identify all categorical columns

# Apply Label Encoding to each categorical column
le = LabelEncoder()

for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

# Select features and target variable
X = df[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
           'Scholarship', 'Additional_Work', 'Sports_activity',
           'Transportation', 'Weekly_Study_Hours', 
           'Attendance', 'Reading', 
           'Notes', 'Listening_in_Class', 
           'Project_work']]  # Features
y = df['Grade']  # Target variable

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Train the model
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
print(f"Accuracy of the Decision Tree Classifier: {accuracy:.2f}")
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

# Visualize the Decision Tree (optional)
plt.figure(figsize=(12, 8))
plot_tree(clf, filled=True, feature_names=X.columns, class_names=np.unique(y).astype(str), rounded=True)
plt.show()

     Student_ID Student_Age     Sex High_School_Type Scholarship  \
0      STUDENT1       19-22    Male            Other         50%   
1      STUDENT2       19-22    Male            Other         50%   
2      STUDENT3       19-22    Male            State         50%   
3      STUDENT4          18  Female          Private         50%   
4      STUDENT5       19-22    Male          Private         50%   
..          ...         ...     ...              ...         ...   
140  STUDENT141       19-22  Female            State         50%   
141  STUDENT142          18  Female            State         75%   
142  STUDENT143          18  Female          Private         75%   
143  STUDENT144       19-22  Female            State         75%   
144  STUDENT145          18  Female          Private        100%   

    Additional_Work Sports_activity Transportation  Weekly_Study_Hours  \
0               Yes              No        Private                   0   
1               Yes              No

  plt.show()


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load and preprocess data
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)
categorical_columns = data.select_dtypes(include=['object']).columns  
le = LabelEncoder()
for col in categorical_columns:
    data[col] = le.fit_transform(data[col])

X = data[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
           'Scholarship', 'Additional_Work', 'Sports_activity',
           'Transportation', 'Weekly_Study_Hours', 
           'Attendance', 'Reading', 
           'Notes', 'Listening_in_Class', 
           'Project_work']]
y = data['Grade']  
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# User Input for Prediction
student_id = input("Enter Student ID (STUDENT1 or STUDENT2 or STUDENT3): ")
student_age = int(input("Enter Student Age (19-22 or 18): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50% or 75% or 100%): "))
additional_work = input("Enter Additional Work (Yes or No): ")
sports_activity = input("Enter Sports Activity (Yes or No): ")
transportation = input("Enter Transportation (Private or Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (0 or 2 or 12): "))
attendance = float(input("Enter Attendance Percentage (1 or 2 or 3): "))
reading = input("Enter Reading Score (Yes or No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

input_data = {
    'Student_ID': student_id,
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])
# Exclude 'Grade' from transformation as it is not part of the input data
for col in categorical_columns:
    if col != 'Grade' and col in label_encoders:
        try:
            input_df[col] = label_encoders[col].transform(input_df[col])
        except ValueError:
            # Handle unseen labels by assigning a default value (e.g., the most frequent label or -1)
            input_df[col] = input_df[col].apply(lambda x: label_encoders[col].classes_[0] if x in label_encoders[col].classes_ else -1)
predicted_grade= regressor.predict(input_df)
print(f"The predicted Grade is: {predicted_grade[0]:.2f}")

The predicted Grade is: 3.78


In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# -------------------------------------------
# 🎯 Grade Mapping Functions
# -------------------------------------------
def convert_numeric_to_letter(grade):
    if grade < 1.0:
        return "F"
    elif grade < 1.75:
        return "D"
    elif grade < 2.0:
        return "C-"
    elif grade < 2.5:
        return "C"
    elif grade < 2.75:
        return "C+"
    elif grade < 3.0:
        return "B-"
    elif grade < 3.5:
        return "B"
    elif grade < 3.75:
        return "B+"
    elif grade < 4.0:
        return "A-"
    elif grade <= 4.0:
        return "A"
    else:
        return "A+"

# -------------------------------------------
# 📥 Load and Preprocess Data
# -------------------------------------------
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)

# Encode categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    if col != 'Grade':  # Skip target
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Features and target
X = df[['Student_ID', 'Student_Age', 'Sex', 'High_School_Type',
        'Scholarship', 'Additional_Work', 'Sports_activity',
        'Transportation', 'Weekly_Study_Hours', 
        'Attendance', 'Reading', 
        'Notes', 'Listening_in_Class', 
        'Project_work']]
y = df['Grade']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

# Train model
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# -------------------------------------------
# 🧑‍🎓 User Input for Prediction
# -------------------------------------------
student_id = input("Enter Student ID (STUDENT1 or STUDENT2 or STUDENT3): ")
student_age = int(input("Enter Student Age (18–22): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50, 75, 100): "))
additional_work = input("Enter Additional Work (Yes/No): ")
sports_activity = input("Enter Sports Activity (Yes/No): ")
transportation = input("Enter Transportation (Private/Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (e.g. 2, 12): "))
attendance = float(input("Enter Attendance Score (1, 2, 3): "))
reading = input("Enter Reading Score (Yes/No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

input_data = {
    'Student_ID': student_id,
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])

# Encode user input using trained label encoders
for col in input_df.columns:
    if col in label_encoders:
        le = label_encoders[col]
        try:
            input_df[col] = le.transform(input_df[col])
        except ValueError:
            # Handle unseen labels gracefully
            input_df[col] = [0]

# -------------------------------------------
# 📈 Predict and Show Result
# -------------------------------------------
predicted_grade = regressor.predict(input_df)[0]
predicted_letter = convert_numeric_to_letter(predicted_grade)

print(f"\n🎯 Predicted Grade: {predicted_grade:.2f} ➝ {predicted_letter}")

ValueError: could not convert string to float: 'BA'

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# -------------------------------------------
# 🎯 Grade Mapping Functions
# -------------------------------------------
def convert_numeric_to_letter(grade):
    if grade < 1.0:
        return "F"
    elif grade < 1.75:
        return "D"
    elif grade < 2.0:
        return "C-"
    elif grade < 2.5:
        return "C"
    elif grade < 2.75:
        return "C+"
    elif grade < 3.0:
        return "B-"
    elif grade < 3.5:
        return "B"
    elif grade < 3.75:
        return "B+"
    elif grade < 4.0:
        return "A-"
    elif grade <= 4.0:
        return "A"
    else:
        return "A+"

# -------------------------------------------
# 📥 Load and Preprocess Data
# -------------------------------------------
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)

# Drop non-numeric irrelevant columns like Student_ID
if 'Student_ID' in df.columns:
    df.drop(columns=['Student_ID'], inplace=True)

# Encode categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    if col != 'Grade':  # Skip target
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Features and target
X = df.drop(columns=['Grade'])
y = df['Grade']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

# Train model
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# -------------------------------------------
# 🧑‍🎓 User Input for Prediction
# -------------------------------------------
student_age = int(input("Enter Student Age (18–22): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50, 75, 100): "))
additional_work = input("Enter Additional Work (Yes/No): ")
sports_activity = input("Enter Sports Activity (Yes/No): ")
transportation = input("Enter Transportation (Private/Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (e.g. 2, 12): "))
attendance = float(input("Enter Attendance Score (1, 2, 3): "))
reading = input("Enter Reading Score (Yes/No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

input_data = {
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])

# Encode user input
for col in input_df.columns:
    if col in label_encoders:
        le = label_encoders[col]
        try:
            input_df[col] = le.transform(input_df[col])
        except ValueError:
            # Handle unseen label by assigning default known value
            input_df[col] = [0]

# -------------------------------------------
# 📈 Predict and Show Result
# -------------------------------------------
predicted_grade = regressor.predict(input_df)[0]
predicted_letter = convert_numeric_to_letter(predicted_grade)

print(f"\n🎯 Predicted Grade: {predicted_grade:.2f} ➝ {predicted_letter}")

ValueError: could not convert string to float: 'BA'

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder

# -------------------------------------------
# 🎯 Grade Mapping
# -------------------------------------------
grade_to_numeric = {
    'F': 0.0, 'D': 1.0, 'C-': 1.75, 'C': 2.0, 'C+': 2.5,
    'B-': 2.75, 'B': 3.0, 'B+': 3.5, 'A-': 3.75, 'A': 4.0, 'A+': 4.1,
    'BA': 3.5  # Map 'BA' to 'B+' (adjust as needed)
}
numeric_to_letter = {v: k for k, v in grade_to_numeric.items()}

def convert_numeric_to_letter(pred):
    closest = min(numeric_to_letter.keys(), key=lambda x: abs(x - pred))
    return numeric_to_letter[closest]

# -------------------------------------------
# 📥 Load and Preprocess Data
# -------------------------------------------
data = pd.read_csv('Students _Performance _Prediction.csv')
df = pd.DataFrame(data)

# Drop non-predictive column
if 'Student_ID' in df.columns:
    df.drop(columns=['Student_ID'], inplace=True)

# Encode letter grades to numeric for model training
df['Grade'] = df['Grade'].map(grade_to_numeric)

# Remove rows with invalid grades
df.dropna(subset=['Grade'], inplace=True)

# Encode categorical features
categorical_columns = df.select_dtypes(include=['object']).columns
label_encoders = {}

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split data
X = df.drop(columns=['Grade'])
y = df['Grade']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=34)

# Train model
regressor = RandomForestRegressor(random_state=34)
regressor.fit(X_train, y_train)

# -------------------------------------------
# 🧑‍🎓 User Input for Prediction
# -------------------------------------------
student_age = int(input("Enter Student Age (18–22): "))
sex = input("Enter Sex (Male/Female): ")
high_school_type = input("Enter High School Type (Public/Private): ")
scholarship = int(input("Enter Scholarship (50, 75, 100): "))
additional_work = input("Enter Additional Work (Yes/No): ")
sports_activity = input("Enter Sports Activity (Yes/No): ")
transportation = input("Enter Transportation (Private/Bus): ")
weekly_study_hours = float(input("Enter Weekly Study Hours (e.g. 2, 12): "))
attendance = float(input("Enter Attendance Score (1, 2, 3): "))
reading = input("Enter Reading Score (Yes/No): ")
notes = float(input("Enter Notes Score (1 or 0): "))
listening_in_class = float(input("Enter Listening in Class Score (1 or 0): "))
project_work = float(input("Enter Project Work Score (1 or 0): "))

input_data = {
    'Student_Age': student_age,
    'Sex': sex,
    'High_School_Type': high_school_type,
    'Scholarship': scholarship,
    'Additional_Work': additional_work,
    'Sports_activity': sports_activity,
    'Transportation': transportation,
    'Weekly_Study_Hours': weekly_study_hours,
    'Attendance': attendance,
    'Reading': reading,
    'Notes': notes,
    'Listening_in_Class': listening_in_class,
    'Project_work': project_work
}

input_df = pd.DataFrame([input_data])

# Encode input using the same encoders
for col in input_df.columns:
    if col in label_encoders:
        le = label_encoders[col]
        try:
            input_df[col] = le.transform(input_df[col])
        except ValueError:
            # Handle unseen label
            input_df[col] = [0]

# -------------------------------------------
# 📈 Predict
# -------------------------------------------
predicted_numeric = regressor.predict(input_df)[0]
predicted_letter = convert_numeric_to_letter(predicted_numeric)

print(f"\n🎯 Predicted Grade: {predicted_numeric:.2f} ➝ {predicted_letter}")



🎯 Predicted Grade: 3.50 ➝ BA
