# Bonus Task (Predicting the Quality Rating)

Implemented by Pratham Shah as part of the Project MANAS AI Taskphase '24-'25

## Importing libraries and storing the data:

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

seniors = pd.read_csv("Seniors.csv")

titles = list(seniors)

data = seniors[titles[1:-1]]

accuracies = seniors["Quality Rating"]

juniors = pd.read_csv("Juniors.csv")

juniors_data = juniors[titles[1:-1]]

## Multiple Linear Regression Functions:

In [2]:
def z_score_normalise(X):
    #Z score normalisation algorithm. Returns z score normalised dataset, means and std. deviations of the features.
    # find the mean of each n columns
    means     = np.mean(X, axis=0)                 # mean will have length n
    # find the standard deviation of each n columns
    stdevs  = np.std(X, axis=0)+0.00001                 # sigma will have length n. +0.00001 to prevent divide by 0 error.
    X_norm = (X - means) / stdevs    

    return (X_norm, means, stdevs)

def calculate_cost(X, y, w, b):
    m = X.shape[0] #stores number of training examples (m), to iterate through them
    cost = 0.0
    for i in range(m):                                      #iterates through training examples                            
        value_using_given_params = np.dot(X.iloc[i], w) + b      #stores the predicted value of the data, using provided weights and bias
        cost = cost + (value_using_given_params - y[i])**2  #calculates the 'sigma' part of the cost function 
    cost = cost / (2 * m)                                   #1/2m * sigma part = complete cost value          
    return cost

def calculate_gradient(X, y, w, b):
    m,n = X.shape           #(number of examples, number of features)
    dj_dw = np.zeros((n,))
    dj_db = 0.
    for i in range(m):                             
        err = (np.dot(X.iloc[i], w) + b) - y[i] #calculates error between actual and predicted value
        for j in range(n):                          
            dj_dw[j] = dj_dw[j] + err * X.iloc[i, j]    
        dj_db = dj_db + err                        
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m                                
    return dj_db, dj_dw #returns derivate of cost function wrt bias and weights

def gradient_descent(X, y, w_init, b_init, cost_function, gradient_function, alpha, num_iters):
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = w_init
    b = b_init
    for i in range(num_iters):
        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)

        # Update Parameters using w, b, learning rate and gradient
        w = w - alpha * dj_dw            
        b = b - alpha * dj_db          
      
        # Save cost J at each iteration
        J_history.append( cost_function(X, y, w, b))
        if(i%10==0):
            print(f"{i+1} : {cost_function(X, y, w, b)}")
        
    return w, b, J_history #return final w,b and J history for graphing

def multiple_regression(xvals, yvals, initial_coeffs, initial_affine, iterations, learning_rate):
  w_final, b_final, J_hist = gradient_descent(xvals, yvals, initial_coeffs, initial_affine,
                                                      calculate_cost, calculate_gradient, 
                                                      learning_rate, iterations) #gets the final weights, bias and cost function history
  print(w_final)
  return w_final, b_final, J_hist


## Training the Model:

In [5]:

y_train = accuracies

raised_to = [3, 1, 1, 1, 2]

data['Temperature (°C)'] = data['Temperature (°C)'] ** raised_to[0]
data['Pressure (kPa)'] = data['Pressure (kPa)'] ** raised_to[1]
data['Temperature x Pressure'] = data['Temperature x Pressure'] ** raised_to[2]
data['Material Fusion Metric'] = data['Material Fusion Metric'] ** raised_to[3]
data['Material Transformation Metric'] = data['Material Transformation Metric'] ** raised_to[4]

X_train, feature_means, feature_stddevs = z_score_normalise(data)

b_init = 0
w_init = np.zeros(5)
coeffs, affine, hist = multiple_regression(X_train, y_train, np.zeros_like(w_init), 0, 100, 0.2) #stores final weights, bias and cost function history

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Temperature (°C)'] = data['Temperature (°C)'] ** raised_to[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Pressure (kPa)'] = data['Pressure (kPa)'] ** raised_to[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Temperature x Pressure'] = data['Temperature x Pressure'] ** raised

1 : 2997.5486969041426
11 : 59.759981803858956
21 : 22.441578902621647
31 : 20.6042537968984
41 : 19.969418426889373
51 : 19.642436037404877
61 : 19.429944349971464
71 : 19.262451586407423
81 : 19.114280200934452
91 : 18.97579118537237
[ -3.3231391   -1.58404528   0.97260918   5.47744391 -12.67075665]


## Working with the Predictions and Storing them in the Juniors.csv file:

In [7]:
juniors_data['Temperature (°C)'] = juniors_data['Temperature (°C)'] ** raised_to[0]
juniors_data['Pressure (kPa)'] = juniors_data['Pressure (kPa)'] ** raised_to[1]
juniors_data['Temperature x Pressure'] = juniors_data['Temperature x Pressure'] ** raised_to[2]
juniors_data['Material Fusion Metric'] = juniors_data['Material Fusion Metric'] ** raised_to[3]
juniors_data['Material Transformation Metric'] = juniors_data['Material Transformation Metric'] ** raised_to[4]

data_to_test, test_means, test_stddevs = z_score_normalise(juniors_data)

predictions = []
for value in data_to_test.iterrows():
    prediction = np.dot(value[1].values, coeffs) + affine
    if(prediction > 100):
        prediction = 100
    predictions.append(prediction)

juniors['Quality Rating'] = predictions

try:
    juniors.drop("Unnamed: 0", inplace = True, axis=1)
except:
    print("Unnamed: 0 doesn't exist anymore :)")
for column in list(juniors):
    if all(x == y for x, y in zip(juniors[column], range(0, len(juniors)))):
        juniors.drop(column, inplace=True, axis=1)
    
juniors.to_csv("Output.csv", index = False)


Unnamed: 0 doesn't exist anymore :)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  juniors_data['Temperature (°C)'] = juniors_data['Temperature (°C)'] ** raised_to[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  juniors_data['Pressure (kPa)'] = juniors_data['Pressure (kPa)'] ** raised_to[1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  juniors_data['Temperature x Pressure'] = 

## Conclusion:

The multiple linear regression model is predicting the values of the Quality Rating to a high degree of efficiency. I have capped the Quality Rating to 100.

The edited CSV file with the added 'Quality Rating' column has been uploaded.