In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model, model_selection

def predict():

  # Read the whole data from CSV
  data = pd.read_csv("math-students.csv", sep=";")

  # The column whose values must be predicted
  predict_column = "G3"

  # Limit the data in a set of columns. These are 
  # the important columns for this test
  study_data = data[["G1", "G2", "G3", "studytime", "failures", "absences"]]
  
  # Set the features (attributes) and the label
  # for the test. The label is the data we want
  # to predict
  features = np.array(study_data.drop([predict_column], axis=1))
  labels = np.array(data[predict_column])

  # Given the whole read data, split it into a set 
  # of trains and tests.
  # Trains: the data used to train the model
  # Tests: the data used to compare the results later
  features_train, features_test, labels_train, labels_test = model_selection.train_test_split(features, labels, test_size = 0.1)
  
  # Train the model drawing the line using Linear 
  # Regression concept. We use different concepts 
  # for different data (entry) formats
  linear = linear_model.LinearRegression()
  linear.fit(features_train, labels_train)

  # Check the accuracy of our model by comparing 
  # the results with the expected data (tests)
  accuracy = linear.score(features_test, labels_test)

  # Since we now have our model trained, we predict 
  # the labels (test ones) based on the features 
  # (also the test ones)
  predictions = linear.predict(features_test)

  # Print details
  for i in range(len(predictions)):
    print("features: .........", features_test[i])
    print("expected label: ...", labels_test[i])
    print("predicted label: ..", predictions[i])
    print("-")
  print("Accuracy:", (accuracy * 100), "%")

In [7]:
predict()

features: ......... [14 14  1  0  2]
expected label: ... 14
predicted label: .. 14.156953794551882
-
features: ......... [10  9  2  0  4]
expected label: ... 9
predicted label: .. 8.651979858335137
-
features: ......... [8 7 2 0 4]
expected label: ... 6
predicted label: .. 6.408565301628801
-
features: ......... [ 9  9  2  2 11]
expected label: ... 9
predicted label: .. 8.134072111954163
-
features: ......... [15 16  3  0  2]
expected label: ... 18
predicted label: .. 15.955791959541694
-
features: ......... [12 13  2  0  4]
expected label: ... 13
predicted label: .. 12.82371033024407
-
features: ......... [11 10  2  0  0]
expected label: ... 10
predicted label: .. 9.5946337961292
-
features: ......... [13 13  4  0  4]
expected label: ... 12
predicted label: .. 12.694232580031285
-
features: ......... [12 12  2  0 14]
expected label: ... 12
predicted label: .. 12.307185724040535
-
features: ......... [ 6  5  1  1 14]
expected label: ... 5
predicted label: .. 4.419446745998786
-
feature

In [5]:
data = pd.read_csv("math-students.csv", sep=";")

In [6]:
display(data)

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,MS,M,20,U,LE3,A,2,2,services,services,...,5,5,4,4,5,4,11,9,9,9
391,MS,M,17,U,LE3,T,3,1,services,services,...,2,4,5,3,4,2,3,14,16,16
392,MS,M,21,R,GT3,T,1,1,other,other,...,5,5,3,3,3,3,3,10,8,7
393,MS,M,18,R,LE3,T,3,2,services,other,...,4,4,1,3,4,5,0,11,12,10
