In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('/content/Student_Performance.csv')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [3]:
df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)
  df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Hours Studied                     10000 non-null  int64  
 1   Previous Scores                   10000 non-null  int64  
 2   Extracurricular Activities        10000 non-null  int64  
 3   Sleep Hours                       10000 non-null  int64  
 4   Sample Question Papers Practiced  10000 non-null  int64  
 5   Performance Index                 10000 non-null  float64
dtypes: float64(1), int64(5)
memory usage: 468.9 KB


In [5]:
X = df.drop('Performance Index', axis=1)
y = df['Performance Index']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(7000, 5)
(3000, 5)
(7000,)
(3000,)


In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
import numpy as np
y_train = np.array(y_train)
y_test = np.array(y_test)

In [9]:
import random

class LassoRegression:
    def __init__(self, learning_rate=0.01, iterations=1000, lambda_param=0.1):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.lambda_param = lambda_param
        self.weights = None
        self.bias = 0

    def fit(self, X, y):
        n_samples, n_features = len(X), len(X[0])
        self.weights = [0] * n_features  # Initialize weights to zero

        for _ in range(self.iterations):
            y_pred = [self.bias + sum(w * x for w, x in zip(self.weights, X[i])) for i in range(n_samples)]

            # Compute gradients
            dw = [(-2/n_samples) * sum(X[i][j] * (y[i] - y_pred[i]) for i in range(n_samples)) for j in range(n_features)]
            db = (-2/n_samples) * sum(y[i] - y_pred[i] for i in range(n_samples))

            # Apply L1 Regularization (Soft-thresholding)
            self.weights = [w - self.learning_rate * (dw[j] + self.lambda_param * (1 if w > 0 else -1)) for j, w in enumerate(self.weights)]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [self.bias + sum(w * x for w, x in zip(self.weights, X[i])) for i in range(len(X))]

    def mse(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)

# Example usage:
if __name__ == "__main__":
    # Train Lasso Regression model
    model = LassoRegression(learning_rate=0.01, iterations=1000, lambda_param=0.1)
    model.fit(X_train_scaled, y_train)


In [10]:
# Test on new data
predictions = model.predict(X_test_scaled)
print("Predictions:", predictions)
pd.DataFrame({'Actual': y_test, 'Predicted': predictions})

Predictions: [54.800690623220056, 22.70777740827453, 47.86500796592385, 31.297690487208808, 43.17013671064188, 59.14989400049672, 46.02039567082863, 86.29851983865524, 37.77784577661606, 71.8834592790975, 63.4761843237005, 25.053269512270894, 60.97599096502986, 44.05561278632327, 48.956530321830826, 44.85184113243898, 58.03392736440584, 78.89544758190898, 65.7823947234, 46.64145278904563, 62.49642590523368, 15.565883058906309, 38.425969052046895, 76.85861260071738, 83.8873427751983, 31.282170255122153, 28.860682092138727, 88.61440206058407, 51.069505536691274, 56.27703157276529, 60.17287808556121, 33.79517470917931, 38.62484483714424, 50.07028292383698, 34.833735941342006, 63.64185824871032, 49.84047517140121, 46.60986030724535, 76.146850600476, 78.61640034117175, 80.54081087046136, 75.91183371548007, 68.12724755009918, 93.52759205818619, 70.15046674676066, 63.97078425504617, 74.4540886362614, 69.43300051134372, 75.22180685872247, 42.33724853725925, 71.30246492956803, 57.56844221251229

Unnamed: 0,Actual,Predicted
0,51.0,54.800691
1,20.0,22.707777
2,46.0,47.865008
3,28.0,31.297690
4,41.0,43.170137
...,...,...
2995,32.0,31.747743
2996,40.0,37.582397
2997,33.0,33.460544
2998,69.0,67.785510


In [11]:
mse = model.mse(y_test, predictions)
print("Mean Squared Error:", mse)

Mean Squared Error: 4.100316357874041
