In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('/content/Student_Performance.csv')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [3]:
df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)
  df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Hours Studied                     10000 non-null  int64  
 1   Previous Scores                   10000 non-null  int64  
 2   Extracurricular Activities        10000 non-null  int64  
 3   Sleep Hours                       10000 non-null  int64  
 4   Sample Question Papers Practiced  10000 non-null  int64  
 5   Performance Index                 10000 non-null  float64
dtypes: float64(1), int64(5)
memory usage: 468.9 KB


In [5]:
X = df.drop('Performance Index', axis=1)
y = df['Performance Index']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(7000, 5)
(3000, 5)
(7000,)
(3000,)


In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [14]:
import numpy as np
y_train = np.array(y_train)
y_test = np.array(y_test)

In [18]:
class RidgeRegression:
    def __init__(self, learning_rate=0.01, iterations=1000, lambda_param=1.0):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.lambda_param = lambda_param  # Regularization strength
        self.weights = None
        self.bias = 0

    def fit(self, X, y):
        n_samples, n_features = len(X), len(X[0])
        self.weights = [0] * n_features  # Initialize weights to zero
        self.bias = 0

        for _ in range(self.iterations):
            y_pred = [self.bias + sum(self.weights[j] * X[i][j] for j in range(n_features)) for i in range(n_samples)]

            # Compute gradients with L2 Regularization
            dw = [(-2 / n_samples) * sum(X[i][j] * (y[i] - y_pred[i]) for i in range(n_samples)) + 2 * self.lambda_param * self.weights[j] for j in range(n_features)]
            db = (-2 / n_samples) * sum(y[i] - y_pred[i] for i in range(n_samples))

            # Update parameters
            self.weights = [self.weights[j] - self.learning_rate * dw[j] for j in range(n_features)]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [self.bias + sum(self.weights[j] * X[i][j] for j in range(len(X[0]))) for i in range(len(X))]

    def get_params(self):
        return {'weights': self.weights, 'bias': self.bias}

    def mse(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)

# Example Usage
if __name__ == "__main__":
    model = RidgeRegression(learning_rate=0.01, iterations=1000, lambda_param=0.1)
    model.fit(X_train_scaled, y_train)


In [19]:
predictions = model.predict(X_test_scaled)

print("Predictions:", predictions)

Predictions: [54.78571162757319, 25.640228842445463, 48.627108490264376, 33.55499183348593, 44.158602675104596, 58.75648444931852, 46.751665299127836, 83.64322204538121, 39.38733810751711, 70.5588467011533, 62.81368636874116, 27.68715118126261, 60.262069066125676, 45.048843600603846, 49.6377857718024, 45.58715483247147, 57.78901788854009, 76.83776253289533, 64.76208292654027, 47.46845318943947, 61.83077482805533, 19.119712189593145, 40.004555270333505, 74.82738253629131, 81.44727674079998, 33.28479674649217, 31.089996804386665, 85.82225251397674, 51.52700130333489, 56.222286014117444, 59.67824107623072, 35.8976693788798, 40.18875113823093, 50.43357537044697, 36.5552792604686, 62.93517532741528, 50.48480832038392, 47.425142052681366, 74.42015007717373, 76.66911206090381, 78.37298636206316, 74.2000425776412, 67.04078453862957, 90.07494936137141, 68.98021131922883, 63.091929948063104, 72.68069629414002, 68.26437561555402, 73.54065964054243, 43.46540565548327, 69.86857111706145, 57.3414033

In [20]:
pd.DataFrame({'Actual': y_test, 'Predicted': predictions})

Unnamed: 0,Actual,Predicted
0,51.0,54.785712
1,20.0,25.640229
2,46.0,48.627108
3,28.0,33.554992
4,41.0,44.158603
...,...,...
2995,32.0,33.997711
2996,40.0,39.156480
2997,33.0,35.432312
2998,69.0,66.822775


In [21]:
model.get_params()

{'weights': [6.735407705363315,
  16.002356919372215,
  0.27599200142184943,
  0.7511637923578401,
  0.5174546444139265],
 'bias': 55.39971419247837}

In [22]:
mse = model.mse(y_test, predictions)
print("Mean Squared Error:", mse)

Mean Squared Error: 7.557025329223965
