In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as mpt
import pandas as pd

In [None]:
# load data and read through pandas library from csv file
data = pd.read_csv('CSV_Data/student-mat.csv')
data.head() # show head(top few row) of the dataframe

In [None]:
# check the shape(rows and columns) of the dataframe
data.shape

In [None]:
# check the data type of each column if the values are numeric, string.
num_columns = [name for name in data.columns if data[name].dtype in ['int64', 'float64']]
str_columns = [name for name in data.columns if data[name].dtype == 'object']

In [None]:
# Display columns using numeric data
num_columns

In [None]:
# Display columns using string data
str_columns

In [None]:
# display the columns using string data type
for x in str_columns:
  print(x," = ",data[x].unique())

In [None]:
# encode the string values to numeric values
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
for i in list(str_columns):
    data[i]=le.fit_transform(data[i])

In [None]:
# Display the first 5 rows of the dataframe after encoding the string values
data.head()

In [None]:
# dropping columns from x, adding column g3(third year grade) on Y
x = data.drop(columns=['G3','address','famsize','Pstatus','paid','romantic','health','reason','guardian','traveltime','goout','Dalc','health','nursery','traveltime','reason'],axis=1)
y = data['G3']

In [None]:
# Display the columns(remaning) after dropping the columns
print (x)

In [None]:
# Display top 5 rows of the dataframe
x.head()

In [None]:
# split data into 4 arrays (x_train, x_test, y_train, y_test)
# call the train_test_split from sklearn model & store 25% of data in test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

In [None]:
# Fit our model on x_train & y_train for using linear regression
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train,y_train)

In [None]:
weights=reg.coef_ # give the coefficient of the features of dataset
bais=reg.intercept_ # gives the point where the function crosses the y-axis.
print(weights,bais)

In [None]:
# call predict function and calculate accuracy percentage in per hundred 
# using regression score(r2_score) to evaluate the performance of a linear regression model. 
from sklearn.metrics import r2_score
y_pred = reg.predict(x_test)
print(r2_score(y_test, y_pred))

In [None]:
# Display accuracy to user
acc = r2_score(y_test, y_pred)*100
print("\n")
print("This program has accuracy of: %f" % acc + " percentage")
print("\n")

In [None]:
# using metrics from sklearn library to calculate the mean squared error 
# calculating an error score helpts to summarize the predictive skill of a model.
from sklearn import metrics
print('The mean absolute error is:', metrics.mean_absolute_error(y_test, y_pred))
print('The mean Square error is:', metrics.mean_squared_error(y_test, y_pred))
print('The root mean square error is:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

In [None]:
# Printing(plotting) predictions
# plot the graph of predicted vs actual values
mpt.scatter(y_test,y_pred)
mpt.xlabel('Test values')
mpt.ylabel('Predicted values')

In [None]:
# Greet User
print("Hello, Welcome to Student Marks Prediction")
print("This program is designed to predict student marks based on their personal information")
# one line space
print("")
print("Give binary input where mentioned. Enter 1 for yes and 0 for no.")
print("")
# take user inputs
print("Choose school (binary: Gabriel Pereira or Mousinho da Silveira): ")
school = input()
print("Choose sex (binary: female or male): ")
sex = input()
print("Enter age (numeric: from 15 to 22): ")
age = input()
# mother father education level- describe
print("Enter Mother education level (numeric: 0 - none, 1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education): ")
Medu = input()
print("Enter Father education level (numeric: 0 - none, 1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education): ")
Fedu = input()
print("Choose Mother job (numeric: 0 - at_home, 1 - health, 2 - other, 3 - services, 4 - teacher ): ")
Mjob = input()
print("Choose Father job (numeric: 0 - at_home, 1 - health, 2 - other, 3 - services, 4 - teacher ): ")
Fjob = input()
print("Enter weekly study time (numeric: 1 - <2 hours, 2 - 2 to 5 hours, 3 - 5 to 10 hours, or 4 - >10 hours): ")
studytime = input()
print("Enter number of past class failures (numeric: n if 1<=n<3, else 4): ")
failures = input()
print("Do you have extra educational support (binary: yes or no): ")
schoolsup = input()
print("Do you have family educational support (binary: yes or no): ")
famsup = input()
print("Do you enroll in extra-curricular activities (binary: yes or no): ")
activities = input()
print("Do you want to take higher education (binary: yes or no): ")
higher = input()
print("Do you have internet access (binary: yes or no): ")
internet = input()
print("Do you have quality of family relationships (numeric: from 1 - very bad to 5 - excellent): ")
famrel = input()
print("Do you have free time after school (numeric: from 1 - very low to 5 - very high): ")
freetime = input()
print("How much is your weekend alcohol consumption (numeric: from 1 - very low to 5 - very high)")
Walc = input()
print("Enter you total days of school absences (numeric: from 0 to 93): ")
absences = input()
print("Enter your first year grade (numeric: from 0 to 20): ")
G1 = input()
print("Enter your second year grade (numeric: from 0 to 20): ")
G2 = input()

In [None]:
try:
    pred = reg.predict([[school,sex,age,Medu,Fedu,Mjob,Fjob,studytime,failures,schoolsup,famsup,activities,higher,internet,famrel,freetime,Walc,absences,G1,G2]])
    print("")
    print("The Final Grade will be:", pred)
    print("")
except:
    print("Enter valid values. You can only enter numeric values within the range as asked.")