In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
df = pd.read_csv('/content/Student_Performance.csv')
df.head()

Unnamed: 0,Hours Studied,Previous Scores,Extracurricular Activities,Sleep Hours,Sample Question Papers Practiced,Performance Index
0,7,99,Yes,9,1,91.0
1,4,82,No,4,2,65.0
2,8,51,Yes,7,2,45.0
3,5,52,Yes,5,2,36.0
4,7,75,No,8,5,66.0


In [10]:
df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)
  df['Extracurricular Activities'].replace(('Yes', 'No'), (1, 0), inplace=True)


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Hours Studied                     10000 non-null  int64  
 1   Previous Scores                   10000 non-null  int64  
 2   Extracurricular Activities        10000 non-null  int64  
 3   Sleep Hours                       10000 non-null  int64  
 4   Sample Question Papers Practiced  10000 non-null  int64  
 5   Performance Index                 10000 non-null  float64
dtypes: float64(1), int64(5)
memory usage: 468.9 KB


In [11]:
X = df.drop('Performance Index', axis=1)
y = df['Performance Index']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(7000, 5)
(3000, 5)
(7000,)
(3000,)


In [17]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [26]:
import numpy as np

class PolynomialRegression:
    def __init__(self, degree=2, learning_rate=0.01, iterations=1000):
        self.degree = degree
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.weights = None
        self.bias = None

    def expand_features(self, X):
        """Manually expands features to polynomial terms."""
        X_expanded = []
        for row in X:
            expanded_row = [1]  # Bias term
            for d in range(1, self.degree + 1):
                expanded_row.extend([x**d for x in row])
            X_expanded.append(expanded_row)
        return X_expanded

    def fit(self, X, y):
        X = self.expand_features(X)  # Expand input features
        m, n = len(X), len(X[0])  # Samples and features

        # Initialize weights and bias
        self.weights = [0] * n
        self.bias = 0

        # Gradient Descent
        for _ in range(self.iterations):
            y_pred = [sum(x * w for x, w in zip(row, self.weights)) + self.bias for row in X]

            # Compute gradients
            error = [yp - yt for yp, yt in zip(y_pred, y)]
            dw = [(1 / m) * sum(error[i] * X[i][j] for i in range(m)) for j in range(n)]
            db = (1 / m) * sum(error)

            # Update weights and bias
            self.weights = [w - self.learning_rate * dw_j for w, dw_j in zip(self.weights, dw)]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        X = self.expand_features(X)
        return [sum(x * w for x, w in zip(row, self.weights)) + self.bias for row in X]

    def mean_squared_error(self, y_true, y_pred):
        """
        Compute Mean Squared Error (MSE)
        """
        return np.mean((y_true - y_pred) ** 2)


In [27]:
# Instantiate and train the model
model = PolynomialRegression(degree=2, learning_rate=0.01, iterations=1000)
model.fit(X_train_scaled, y_train)

In [28]:
# Predict and evaluate
predictions = model.predict(X_test_scaled)
pd.DataFrame({'ytest': y_test, 'ypred': predictions})

Unnamed: 0,ytest,ypred
6252,51.0,53.657942
4684,20.0,23.614447
1731,46.0,46.773288
4742,28.0,30.960480
4521,41.0,43.024799
...,...,...
8014,32.0,32.078220
1074,40.0,36.708050
3063,33.0,32.347572
6487,69.0,67.775135


In [29]:
mse = model.mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 4.548109938412538
