# MARKS PREDICTION

## Importing Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
# load dataset
df = pd.read_csv('Students Performance Dataset.csv')

# Dividing column into input and ouput
X = df.drop(columns=['Student_ID', 'First_Name', 'Last_Name', 'Email', 'Grade', 'Extracurricular_Activities', 'Internet_Access_at_Home', 'Parent_Education_Level', 'Family_Income_Level', 'Sleep_Hours_per_Night'], axis=1)
y = df['Total_Score']

#Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state = 42)

# Divide data into categorical and numerical
cat_col = ['Gender', 'Department']
num_col = ['Age', 'Attendance (%)', 'Midterm_Score', 'Final_Score', 'Assignments_Avg', 'Quizzes_Avg', 'Participation_Score', 'Projects_Score', 'Study_Hours_per_Week']

#convert text into numerical and numerical into scale value
convert = ColumnTransformer(
    transformers = [
        ('num', StandardScaler(), num_col),
        ('cat', OneHotEncoder(handle_unknown= 'ignore'), cat_col)
    ]
)

# using pipeline for automate the workflow
model = Pipeline(steps=[
    ('transform', convert),
    ('linear_regression', LinearRegression())
])

# Training the model
model.fit(X_train, y_train)

# testing the model
y_pred = model.predict(X_test)

# creating user input for prediction
gender = input("enter you gender(male/female): ")
age = int(input("enter you age: "))
dept = input("Enter the your department name(Mathematics, Business, Engineering, CS(Computer Science)): ")
attendance = float(input("enter you attendance percent: "))
mid_term = float(input("enter your mid_term marks: "))
final_term = float(input("enter your final_term marks: "))
Assign_avg = float(input("enter your assignment marks: "))
Quizzes_avg = float(input("enter your Quiz marks: "))
participation_score = float(input("enter your participation marks: "))
project_score = float(input("enter the your project score: "))
study_hours = float(input("enter you study hours: "))

# converting user input into dataframe.
user_input = pd.DataFrame([{
    'Gender': gender,
    'Age': age,
    'Department': dept,
    'Attendance (%)': attendance,
    'Midterm_Score': mid_term,
    'Final_Score': final_term,
    'Assignments_Avg': Assign_avg,
    'Quizzes_Avg': Quizzes_avg,
    'Participation_Score': participation_score,
    'Projects_Score': project_score,
    'Study_Hours_per_Week': study_hours
}])

# Predicting the price of house based on user input
pred_price = model.predict(user_input)[0]

if 90 <= pred_price <= 100:
    print(f"Based on your record, the obtained marks are {pred_price}, Grade = A")

elif 80 <= pred_price <= 89:
    print(f"Based on your record, the obtained marks are {pred_price}, Grade = B")

elif 70 <= pred_price <= 79:
    print(f"Based on your record, the obtained marks are {pred_price}, Grade = C")

elif 60 <= pred_price <= 69:
    print(f"Based on your record, the obtained marks are {pred_price}, Grade = D")

elif 50 <= pred_price <= 59:
    print(f"Based on your record, the obtained marks are {pred_price}, Grade = E")

elif 0 <= pred_price <= 49:
    print(f"Based on your record, the obtained marks are {pred_price}, Grade = F")

else:
    print("Invalid marks")

print('\n')
print("YOUR RECORDS:")
print("Gender:", gender)
print("Age: ",age)
print("Attendance: ", attendance)
print("Mid_term marks: ",mid_term)
print("Final term marks: ", final_term)
print("Quizzes avg: ",Quizzes_avg)
print("Participation score: ",participation_score)
print("Project score: ",project_score)
print("Studied hours: ",study_hours)

Based on your record, the obtained marks are 86.7, Grade = B
YOUR RECORDS:
Gender: male
Age:  18
Attendance:  78.0
Mid_term marks:  98.0
Final term marks:  96.0
Quizzes avg:  87.0
Participation score:  78.0
Project score:  75.0
Studied hours:  9.0


## Checking accuracy of model


In [6]:
from sklearn.metrics import r2_score

r = r2_score(y_test, y_pred)
print("r^2 score of model: ", r)

r^2 score of model:  1.0
