### Importing required libraries.

In [71]:
import numpy as np
import pandas as pd


### Importing the dataset

In [41]:
data = pd.read_csv('grades_data.csv')
print(data.head())


                                    Course Name  Grading Strength  \
0                         Engineering Chemistry                58   
1  Basic Electrical and Electronics Engineering                58   
2                                      Calculus                58   
3    Structured and Object-Oriented Programming                64   
4                Data Structures and Algorithms                66   

   Class Average  Standard Deviation   S   A   B   C   D   E  
0      61.369999               11.50  80  67  56  50  44  38  
1      76.940002                8.45  88  81  73  68  64  50  
2      79.930000               11.95  95  86  74  68  62  50  
3      62.170000               17.82  85  71  53  44  35  27  
4      65.900000               14.89  85  73  58  51  44  36  


### Checking for datatypes and null values

In [42]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Course Name         46 non-null     object 
 1   Grading Strength    46 non-null     int64  
 2   Class Average       46 non-null     float64
 3   Standard Deviation  46 non-null     float64
 4   S                   46 non-null     int64  
 5   A                   46 non-null     int64  
 6   B                   46 non-null     int64  
 7   C                   46 non-null     int64  
 8   D                   46 non-null     int64  
 9   E                   46 non-null     int64  
dtypes: float64(2), int64(7), object(1)
memory usage: 3.7+ KB


In [43]:
data.isna().sum()

Course Name           0
Grading Strength      0
Class Average         0
Standard Deviation    0
S                     0
A                     0
B                     0
C                     0
D                     0
E                     0
dtype: int64

### Manipulating columns according to our need

In [44]:
#data = data.drop("Course Name", axis=1)
data["Class Average"] = data["Class Average"].round(2)
data["Standard Deviation"] = data["Standard Deviation"].round(2)
data.head()

Unnamed: 0,Course Name,Grading Strength,Class Average,Standard Deviation,S,A,B,C,D,E
0,Engineering Chemistry,58,61.37,11.5,80,67,56,50,44,38
1,Basic Electrical and Electronics Engineering,58,76.94,8.45,88,81,73,68,64,50
2,Calculus,58,79.93,11.95,95,86,74,68,62,50
3,Structured and Object-Oriented Programming,64,62.17,17.82,85,71,53,44,35,27
4,Data Structures and Algorithms,66,65.9,14.89,85,73,58,51,44,36


In [47]:
X = data.iloc[:,1:4].values

int_features = [70,65,10,4]

y = int_features.pop()
features=np.array(int_features)

print(features.reshape(1,-1))

[[70 65 10]]


In [48]:
Y = data.iloc[:,y].values
print(Y)

[80 88 95 85 85 82 88 91 90 87 95 92 89 93 86 92 96 80 80 80 81 80 81 91
 91 93 90 92 86 90 85 86 90 96 80 87 85 91 93 90 92 80 80 80 84 80]


### Splitting into Training and Test set

In [50]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,random_state=18, test_size=0.2)

### Feature Scaling (standardizing the data)

In [51]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [52]:
print(X_train)

[[-1.16284933e+00 -1.62447762e+00  3.98172010e-01]
 [ 8.74709676e-01  1.01722226e+00 -5.69585219e-01]
 [ 6.89477039e-01  3.56380093e-01  3.55476838e-01]
 [ 5.04244402e-01  8.60355687e-01  1.29192427e+00]
 [ 8.74709676e-01  1.28256041e+00 -3.73187428e-01]
 [-1.34808197e+00 -7.13316450e-01 -1.36656176e+00]
 [-3.20040835e+00 -2.91111730e-01  1.58509779e+00]
 [ 5.04244402e-01 -1.77633782e-01  1.31754138e+00]
 [ 6.89477039e-01 -4.73010206e-01 -2.65026326e-01]
 [ 3.19011764e-01 -4.31290372e-01  7.82428557e-01]
 [-6.07151422e-01  1.02222864e+00  1.01013614e+00]
 [-2.27424516e+00 -1.06710064e+00  8.99128693e-01]
 [-1.34808197e+00 -1.40419690e+00 -6.86285355e-02]
 [-7.92384060e-01 -3.14474837e-01 -1.21285914e+00]
 [ 1.33779127e-01 -1.89982853e+00 -5.72431563e-02]
 [ 8.74709676e-01  7.10164285e-01  3.72554907e-01]
 [ 6.89477039e-01 -1.04039995e+00  6.20186904e-01]
 [ 6.89477039e-01 -4.78016586e-01  7.79582212e-01]
 [ 8.74709676e-01  1.64468857e-01 -2.13792120e-01]
 [ 8.74709676e-01  3.76405613e-

### Decision Tree Regressor

In [64]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=500, random_state=18)
regressor.fit(X_train, Y_train)

### Predictions

In [65]:
predicted_value = regressor.predict(scaler.transform(features.reshape(1,-1)))
print(predicted_value)

[81.24]


### Testing scores

In [69]:
from sklearn.metrics import r2_score, mean_absolute_error,mean_squared_error

def evaluate_model(model, X_test, Y_test):
  y_pred = model.predict(X_test)

  # r2 score
  r2=r2_score(Y_test, y_pred)
  
  # mean absolute error
  mae=mean_absolute_error(Y_test, y_pred)
 
  # Mean squared error
  mse=mean_squared_error(Y_test, y_pred)

  #root mean squared error
  rmse=np.sqrt(mse)

  print(f"R square score: {r2}")
  print(f"Mean absolute error : {mae}")
  print(f"Mean squared error: {mse}")
  print(f"Root mean square error: {rmse}")

  return r2, mae, mse, rmse


In [None]:
r2, mae,mse,rmse = evaluate_model(regressor, X_test, Y_test)

R square score: 0.8291407387862796
Mean absolute error : 1.2611999999999994
Mean squared error: 2.5902264000000015
Root mean square error: 1.6094180314635478


In [73]:
import os
import pickle

model_path = "models/random_forest_model.pkl"
with open(model_path, "wb") as file:
    pickle.dump(regressor, file)

print(f"Model saved successfully to {model_path}")


Model saved successfully to models/random_forest_model.pkl
