### Load Data

In [1]:
import pandas as pd

In [2]:
# Loading data
data = pd.read_csv('Prodigy University Dataset.csv')
# Split the data into features (X) and target (y)
data.head()

Unnamed: 0,sat_sum,hs_gpa,fy_gpa
0,508,3.4,3.18
1,488,4.0,3.33
2,464,3.75,3.25
3,380,3.75,2.42
4,428,4.0,2.63


### Data pre-processing

In [3]:
# Converting data to numpy so that we can build tensor
X = data[['sat_sum', 'hs_gpa']].values
# reshape the fy_gpa into a 2D array with [data_size] rows and 1 column
y = data['fy_gpa'].values.reshape(-1, 1)
print(X.shape)
print(y.shape)

(1000, 2)
(1000, 1)


In [4]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.preprocessing import StandardScaler

# Normalize the features so that it is easier to train the data
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)

In [6]:
X_train.shape

(800, 2)

In [8]:
import torch
# Convert numpy to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

### Building the Model

In [9]:
import torch.nn as nn

In [10]:
# Building model with 2 neurons
model = nn.Sequential(
    nn.Linear(2, 2), # 2 input features, 1 hidden layer with 2 neurons
    nn.Sigmoid(), 
    nn.Linear(2, 1) # output layer with single neuron output
)

# We use sigmoid activation function for hidden layer and linear activation function for output layer

In [11]:
# Forward Propagation
preds = model(X_train_tensor)

In [12]:
preds[:5]

tensor([[-0.9278],
        [-0.8987],
        [-0.6996],
        [-0.8377],
        [-0.7490]], grad_fn=<SliceBackward0>)

In [13]:
from torch.nn import MSELoss

In [14]:
# Calculating Loss
criterion = MSELoss()
loss = criterion(preds, y_train_tensor)
print(loss)
# very high loss

tensor(11.0022, grad_fn=<MseLossBackward0>)
