In [1]:
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import train_test_split
import clang.cindex
import tempfile
from sklearn.model_selection import KFold



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:

data = pd.read_csv('Data_Ast.csv')
data.head(5)

Unnamed: 0.1,Unnamed: 0,Question,Correct_Code,Code_with_Error,Total_Marks,AST_full
0,0,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\nvoid printFactors(int numb...,7.0,CursorKind.FUNCTION_DECL printFactors\n Curso...
1,1,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\nvoid printFactors(int numb...,8.0,CursorKind.FUNCTION_DECL printFactors\n Curso...
2,2,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\nvoid printFactors(int numb...,5.0,CursorKind.FUNCTION_DECL printFactors\n Curso...
3,3,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\n\nvoid printFactors(int nu...,7.0,CursorKind.FUNCTION_DECL printFactors\n Curso...
4,4,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\n\nvoid printFactors(int nu...,5.0,CursorKind.FUNCTION_DECL printFactors\n Curso...


In [5]:
len(labels)

1000

In [15]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd  # Assuming pandas is used for data storage

# Define your device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load embeddings and labels
loaded_embeddings = torch.load('embeddings_code.pt', map_location=device)
code_embeddings = loaded_embeddings['code_embeddings']  # Shape: [1, 1000, 1, 512, 768]
code_embeddings = code_embeddings.squeeze(0).squeeze(1)  # Shape: [1000, 512, 768]
code_embeddings = code_embeddings[:, 0, :]  # Select CLS token, Shape: [1000, 768]
print(f"Embeddings loaded. Shape: {code_embeddings.shape}")

# Assuming 'data' is a pandas DataFrame that contains the 'Total_Marks' column and a placeholder for predictions

labels = torch.tensor(data['Total_Marks']).float()  # Assuming 'Total_Marks' column contains the target labels
assert len(labels) == code_embeddings.size(0), "Number of labels must match number of embeddings"

# Normalize labels (min-max normalization)
labels_min = labels.min()
labels_max = labels.max()
labels = (labels - labels_min) / (labels_max - labels_min)  # Normalize to range [0, 1]

# Custom Dataset for embeddings and labels
class CodeDataset(Dataset):
    def __init__(self, embeddings, labels):
        self.embeddings = embeddings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.labels[idx]

# BiLSTM Model for regression with an extra hidden layer
class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers):
        super(BiLSTMModel, self).__init__()
        self.bilstm = nn.LSTM(
            input_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            bidirectional=True
        )
        self.fc1 = nn.Linear(hidden_dim * 2, 128)  # Additional hidden layer
        self.fc2 = nn.Linear(128, 1)  # Output layer for regression

    def forward(self, x):
        lstm_out, _ = self.bilstm(x)  # [batch_size, seq_len, 2*hidden_dim]
        lstm_out = lstm_out.mean(dim=1)  # Mean pooling over sequence length, shape: [batch_size, 2*hidden_dim]
        fc1_out = torch.relu(self.fc1(lstm_out))  # Pass through first hidden layer
        output = self.fc2(fc1_out)  # [batch_size, 1]
        return output

# Hyperparameters
input_dim = code_embeddings.size(-1)  # 768
hidden_dim = 256  # Hidden state size
num_layers = 2   # Number of BiLSTM layers
batch_size = 32  # Batch size for training
num_epochs = 100  # Number of epochs
learning_rate = 0.001

# Initialize the model, loss function, and optimizer
criterion = nn.MSELoss()

# 10-Fold Cross-Validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)
predicted_values = np.zeros(len(labels))  # Store predictions for all folds

for fold, (train_idx, test_idx) in enumerate(kf.split(code_embeddings)):
    print(f"Fold {fold + 1}")

    # Prepare data
    train_dataset = CodeDataset(code_embeddings[train_idx], labels[train_idx])
    test_dataset = CodeDataset(code_embeddings[test_idx], labels[test_idx])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Reset model and optimizer
    model = BiLSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, num_layers=num_layers).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0

        for inputs, target in train_loader:
            inputs = inputs.to(device).float().unsqueeze(1)  # Add sequence length dimension
            target = target.to(device).float()

            # Forward pass
            output = model(inputs).squeeze(-1)  # [batch_size]
            loss = criterion(output, target)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    # Evaluate the model
    model.eval()
    test_preds = []
    test_targets = []

    with torch.no_grad():
        for inputs, target in test_loader:
            inputs = inputs.to(device).float().unsqueeze(1)
            target = target.to(device).float()

            output = model(inputs).squeeze(-1)  # [batch_size]
            test_preds.extend(output.cpu().numpy())
            test_targets.extend(target.cpu().numpy())

    # De-normalize predictions and targets
    test_preds = np.array(test_preds) * (labels_max - labels_min).item() + labels_min.item()
    test_targets = np.array(test_targets) * (labels_max - labels_min).item() + labels_min.item()

    # Store predictions back into the dataset
    data.loc[test_idx, 'Predictions'] = test_preds

    # Store predictions in their respective indices for metrics calculation
    predicted_values[test_idx] = test_preds

    # Calculate metrics
    r2 = r2_score(test_targets, test_preds)
    rmse = mean_squared_error(test_targets, test_preds, squared=False)
    mape = mean_absolute_percentage_error(test_targets, test_preds)

    print(f"Fold {fold + 1} R²: {r2:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")

# Save the predicted values back to 'data'
data.to_csv('data_with_predictions.csv', index=False)
print("Predictions saved to 'data_with_predictions.csv'!")

# Final metrics on the entire dataset
r2_final = r2_score(labels * (labels_max - labels_min).item() + labels_min.item(), predicted_values)
rmse_final = mean_squared_error(labels * (labels_max - labels_min).item() + labels_min.item(), predicted_values, squared=False)
mape_final = mean_absolute_percentage_error(labels * (labels_max - labels_min).item() + labels_min.item(), predicted_values)

print(f"Final Metrics - R²: {r2_final:.4f}, RMSE: {rmse_final:.4f}, MAPE: {mape_final:.4f}")


Embeddings loaded. Shape: torch.Size([1000, 768])
Fold 1
Epoch 1/100, Loss: 2.5622
Epoch 2/100, Loss: 1.4541
Epoch 3/100, Loss: 1.3922
Epoch 4/100, Loss: 1.3034
Epoch 5/100, Loss: 1.2679
Epoch 6/100, Loss: 1.2522
Epoch 7/100, Loss: 1.2370
Epoch 8/100, Loss: 1.4184
Epoch 9/100, Loss: 1.2566
Epoch 10/100, Loss: 1.1181
Epoch 11/100, Loss: 1.1202
Epoch 12/100, Loss: 1.1388
Epoch 13/100, Loss: 1.3160
Epoch 14/100, Loss: 1.4063
Epoch 15/100, Loss: 1.0895
Epoch 16/100, Loss: 1.1566
Epoch 17/100, Loss: 1.0332
Epoch 18/100, Loss: 1.0534
Epoch 19/100, Loss: 1.0054
Epoch 20/100, Loss: 1.1219
Epoch 21/100, Loss: 1.0215
Epoch 22/100, Loss: 1.0521
Epoch 23/100, Loss: 1.0159
Epoch 24/100, Loss: 1.0387
Epoch 25/100, Loss: 1.0246
Epoch 26/100, Loss: 0.9851
Epoch 27/100, Loss: 0.9898
Epoch 28/100, Loss: 1.0412
Epoch 29/100, Loss: 0.9257
Epoch 30/100, Loss: 0.9510
Epoch 31/100, Loss: 0.9640
Epoch 32/100, Loss: 1.0106
Epoch 33/100, Loss: 0.9373
Epoch 34/100, Loss: 1.0136
Epoch 35/100, Loss: 0.9122
Epoch 3



Epoch 1/100, Loss: 4.3556
Epoch 2/100, Loss: 1.6236
Epoch 3/100, Loss: 1.4929
Epoch 4/100, Loss: 1.4113
Epoch 5/100, Loss: 1.3385
Epoch 6/100, Loss: 1.2602
Epoch 7/100, Loss: 1.1883
Epoch 8/100, Loss: 1.1499
Epoch 9/100, Loss: 1.3984
Epoch 10/100, Loss: 1.2109
Epoch 11/100, Loss: 1.2252
Epoch 12/100, Loss: 1.1356
Epoch 13/100, Loss: 1.1191
Epoch 14/100, Loss: 1.0898
Epoch 15/100, Loss: 1.0563
Epoch 16/100, Loss: 1.0618
Epoch 17/100, Loss: 1.1233
Epoch 18/100, Loss: 1.0999
Epoch 19/100, Loss: 1.2269
Epoch 20/100, Loss: 0.9940
Epoch 21/100, Loss: 1.0210
Epoch 22/100, Loss: 0.9823
Epoch 23/100, Loss: 1.0888
Epoch 24/100, Loss: 0.9877
Epoch 25/100, Loss: 1.0728
Epoch 26/100, Loss: 1.1453
Epoch 27/100, Loss: 0.9986
Epoch 28/100, Loss: 0.9970
Epoch 29/100, Loss: 0.9254
Epoch 30/100, Loss: 1.0287
Epoch 31/100, Loss: 1.0723
Epoch 32/100, Loss: 0.9365
Epoch 33/100, Loss: 0.8911
Epoch 34/100, Loss: 1.0301
Epoch 35/100, Loss: 0.9900
Epoch 36/100, Loss: 0.9038
Epoch 37/100, Loss: 0.8754
Epoch 38/1



Epoch 1/100, Loss: 3.4604
Epoch 2/100, Loss: 1.4909
Epoch 3/100, Loss: 1.4640
Epoch 4/100, Loss: 1.4919
Epoch 5/100, Loss: 1.4773
Epoch 6/100, Loss: 1.3422
Epoch 7/100, Loss: 1.2916
Epoch 8/100, Loss: 1.3462
Epoch 9/100, Loss: 1.1843
Epoch 10/100, Loss: 1.1473
Epoch 11/100, Loss: 1.1577
Epoch 12/100, Loss: 1.1077
Epoch 13/100, Loss: 1.0692
Epoch 14/100, Loss: 1.1672
Epoch 15/100, Loss: 1.0300
Epoch 16/100, Loss: 1.1409
Epoch 17/100, Loss: 1.1230
Epoch 18/100, Loss: 1.0383
Epoch 19/100, Loss: 1.0106
Epoch 20/100, Loss: 1.0565
Epoch 21/100, Loss: 1.0170
Epoch 22/100, Loss: 0.9996
Epoch 23/100, Loss: 1.1556
Epoch 24/100, Loss: 1.0863
Epoch 25/100, Loss: 1.0188
Epoch 26/100, Loss: 1.0742
Epoch 27/100, Loss: 0.9690
Epoch 28/100, Loss: 0.9948
Epoch 29/100, Loss: 0.9961
Epoch 30/100, Loss: 0.9957
Epoch 31/100, Loss: 0.9373
Epoch 32/100, Loss: 0.9338
Epoch 33/100, Loss: 0.9863
Epoch 34/100, Loss: 0.9619
Epoch 35/100, Loss: 0.9746
Epoch 36/100, Loss: 0.9991
Epoch 37/100, Loss: 0.9118
Epoch 38/1



Epoch 1/100, Loss: 3.5870
Epoch 2/100, Loss: 1.5284
Epoch 3/100, Loss: 1.4180
Epoch 4/100, Loss: 1.3689
Epoch 5/100, Loss: 1.3077
Epoch 6/100, Loss: 1.5763
Epoch 7/100, Loss: 1.3813
Epoch 8/100, Loss: 1.3083
Epoch 9/100, Loss: 1.2758
Epoch 10/100, Loss: 1.2206
Epoch 11/100, Loss: 1.1609
Epoch 12/100, Loss: 1.2397
Epoch 13/100, Loss: 1.1112
Epoch 14/100, Loss: 1.1442
Epoch 15/100, Loss: 1.1580
Epoch 16/100, Loss: 1.0596
Epoch 17/100, Loss: 1.1508
Epoch 18/100, Loss: 1.0495
Epoch 19/100, Loss: 1.1518
Epoch 20/100, Loss: 1.0703
Epoch 21/100, Loss: 1.0182
Epoch 22/100, Loss: 1.0036
Epoch 23/100, Loss: 1.0773
Epoch 24/100, Loss: 1.0996
Epoch 25/100, Loss: 1.0743
Epoch 26/100, Loss: 1.0623
Epoch 27/100, Loss: 0.9723
Epoch 28/100, Loss: 1.0518
Epoch 29/100, Loss: 0.9894
Epoch 30/100, Loss: 0.9820
Epoch 31/100, Loss: 1.0234
Epoch 32/100, Loss: 0.9474
Epoch 33/100, Loss: 0.9494
Epoch 34/100, Loss: 0.9444
Epoch 35/100, Loss: 0.9460
Epoch 36/100, Loss: 0.9422
Epoch 37/100, Loss: 1.0248
Epoch 38/1



Epoch 1/100, Loss: 3.5264
Epoch 2/100, Loss: 1.6670
Epoch 3/100, Loss: 1.4926
Epoch 4/100, Loss: 1.3897
Epoch 5/100, Loss: 1.3355
Epoch 6/100, Loss: 1.3937
Epoch 7/100, Loss: 1.4685
Epoch 8/100, Loss: 1.2795
Epoch 9/100, Loss: 1.2450
Epoch 10/100, Loss: 1.3210
Epoch 11/100, Loss: 1.3209
Epoch 12/100, Loss: 1.2725
Epoch 13/100, Loss: 1.1502
Epoch 14/100, Loss: 1.1204
Epoch 15/100, Loss: 1.1854
Epoch 16/100, Loss: 1.1150
Epoch 17/100, Loss: 1.1292
Epoch 18/100, Loss: 1.0801
Epoch 19/100, Loss: 1.0974
Epoch 20/100, Loss: 1.2808
Epoch 21/100, Loss: 1.1772
Epoch 22/100, Loss: 1.1702
Epoch 23/100, Loss: 1.0351
Epoch 24/100, Loss: 0.9950
Epoch 25/100, Loss: 1.0819
Epoch 26/100, Loss: 1.0037
Epoch 27/100, Loss: 0.9685
Epoch 28/100, Loss: 1.0601
Epoch 29/100, Loss: 1.0040
Epoch 30/100, Loss: 0.9939
Epoch 31/100, Loss: 1.0595
Epoch 32/100, Loss: 0.9479
Epoch 33/100, Loss: 1.0737
Epoch 34/100, Loss: 0.9772
Epoch 35/100, Loss: 1.0875
Epoch 36/100, Loss: 0.9832
Epoch 37/100, Loss: 0.9448
Epoch 38/1



Epoch 1/100, Loss: 3.1662
Epoch 2/100, Loss: 1.4883
Epoch 3/100, Loss: 1.4948
Epoch 4/100, Loss: 1.4530
Epoch 5/100, Loss: 1.3699
Epoch 6/100, Loss: 1.2504
Epoch 7/100, Loss: 1.2735
Epoch 8/100, Loss: 1.2793
Epoch 9/100, Loss: 1.1521
Epoch 10/100, Loss: 1.1881
Epoch 11/100, Loss: 1.1233
Epoch 12/100, Loss: 1.1863
Epoch 13/100, Loss: 1.0664
Epoch 14/100, Loss: 1.1166
Epoch 15/100, Loss: 1.1275
Epoch 16/100, Loss: 1.1176
Epoch 17/100, Loss: 1.1488
Epoch 18/100, Loss: 1.1338
Epoch 19/100, Loss: 1.0546
Epoch 20/100, Loss: 1.0300
Epoch 21/100, Loss: 1.0266
Epoch 22/100, Loss: 1.0568
Epoch 23/100, Loss: 1.0761
Epoch 24/100, Loss: 1.0284
Epoch 25/100, Loss: 1.0084
Epoch 26/100, Loss: 0.9563
Epoch 27/100, Loss: 1.0253
Epoch 28/100, Loss: 0.9922
Epoch 29/100, Loss: 0.9736
Epoch 30/100, Loss: 1.0181
Epoch 31/100, Loss: 0.9470
Epoch 32/100, Loss: 0.9047
Epoch 33/100, Loss: 0.9294
Epoch 34/100, Loss: 0.9814
Epoch 35/100, Loss: 0.9899
Epoch 36/100, Loss: 0.9531
Epoch 37/100, Loss: 0.9153
Epoch 38/1



Epoch 1/100, Loss: 3.9215
Epoch 2/100, Loss: 1.5635
Epoch 3/100, Loss: 1.4338
Epoch 4/100, Loss: 1.4204
Epoch 5/100, Loss: 1.4368
Epoch 6/100, Loss: 1.2298
Epoch 7/100, Loss: 1.2755
Epoch 8/100, Loss: 1.2207
Epoch 9/100, Loss: 1.2381
Epoch 10/100, Loss: 1.1429
Epoch 11/100, Loss: 1.1825
Epoch 12/100, Loss: 1.2523
Epoch 13/100, Loss: 1.1184
Epoch 14/100, Loss: 1.1909
Epoch 15/100, Loss: 1.1539
Epoch 16/100, Loss: 1.2757
Epoch 17/100, Loss: 1.0629
Epoch 18/100, Loss: 1.0668
Epoch 19/100, Loss: 1.1567
Epoch 20/100, Loss: 1.0371
Epoch 21/100, Loss: 1.0381
Epoch 22/100, Loss: 0.9888
Epoch 23/100, Loss: 1.0116
Epoch 24/100, Loss: 1.0048
Epoch 25/100, Loss: 1.1174
Epoch 26/100, Loss: 1.0290
Epoch 27/100, Loss: 0.9880
Epoch 28/100, Loss: 0.9356
Epoch 29/100, Loss: 0.9246
Epoch 30/100, Loss: 0.9991
Epoch 31/100, Loss: 0.9452
Epoch 32/100, Loss: 0.9584
Epoch 33/100, Loss: 0.9641
Epoch 34/100, Loss: 0.9665
Epoch 35/100, Loss: 1.0006
Epoch 36/100, Loss: 1.0031
Epoch 37/100, Loss: 0.9945
Epoch 38/1



Epoch 1/100, Loss: 2.9548
Epoch 2/100, Loss: 1.6033
Epoch 3/100, Loss: 1.4864
Epoch 4/100, Loss: 1.3934
Epoch 5/100, Loss: 1.4508
Epoch 6/100, Loss: 1.3498
Epoch 7/100, Loss: 1.2632
Epoch 8/100, Loss: 1.2382
Epoch 9/100, Loss: 1.4664
Epoch 10/100, Loss: 1.2727
Epoch 11/100, Loss: 1.1656
Epoch 12/100, Loss: 1.1440
Epoch 13/100, Loss: 1.1222
Epoch 14/100, Loss: 1.0920
Epoch 15/100, Loss: 1.1076
Epoch 16/100, Loss: 1.0368
Epoch 17/100, Loss: 1.3131
Epoch 18/100, Loss: 1.2230
Epoch 19/100, Loss: 1.0881
Epoch 20/100, Loss: 1.1330
Epoch 21/100, Loss: 1.0515
Epoch 22/100, Loss: 1.0320
Epoch 23/100, Loss: 0.9890
Epoch 24/100, Loss: 0.9611
Epoch 25/100, Loss: 0.9979
Epoch 26/100, Loss: 1.0119
Epoch 27/100, Loss: 1.0509
Epoch 28/100, Loss: 1.1138
Epoch 29/100, Loss: 1.1421
Epoch 30/100, Loss: 1.0934
Epoch 31/100, Loss: 1.0097
Epoch 32/100, Loss: 1.1193
Epoch 33/100, Loss: 0.9206
Epoch 34/100, Loss: 1.1039
Epoch 35/100, Loss: 0.9696
Epoch 36/100, Loss: 0.9270
Epoch 37/100, Loss: 0.9610
Epoch 38/1



Epoch 1/100, Loss: 3.7093
Epoch 2/100, Loss: 1.5765
Epoch 3/100, Loss: 1.4716
Epoch 4/100, Loss: 1.3817
Epoch 5/100, Loss: 1.4244
Epoch 6/100, Loss: 1.3885
Epoch 7/100, Loss: 1.4241
Epoch 8/100, Loss: 1.4334
Epoch 9/100, Loss: 1.2019
Epoch 10/100, Loss: 1.2734
Epoch 11/100, Loss: 1.1489
Epoch 12/100, Loss: 1.1467
Epoch 13/100, Loss: 1.0959
Epoch 14/100, Loss: 1.2027
Epoch 15/100, Loss: 1.2165
Epoch 16/100, Loss: 1.0825
Epoch 17/100, Loss: 1.1068
Epoch 18/100, Loss: 1.1744
Epoch 19/100, Loss: 1.0165
Epoch 20/100, Loss: 1.1741
Epoch 21/100, Loss: 1.0874
Epoch 22/100, Loss: 0.9608
Epoch 23/100, Loss: 1.1058
Epoch 24/100, Loss: 1.0032
Epoch 25/100, Loss: 0.9859
Epoch 26/100, Loss: 0.9698
Epoch 27/100, Loss: 0.9620
Epoch 28/100, Loss: 1.0316
Epoch 29/100, Loss: 1.0103
Epoch 30/100, Loss: 1.0249
Epoch 31/100, Loss: 1.1420
Epoch 32/100, Loss: 1.0895
Epoch 33/100, Loss: 0.9875
Epoch 34/100, Loss: 1.0076
Epoch 35/100, Loss: 0.9432
Epoch 36/100, Loss: 0.9071
Epoch 37/100, Loss: 0.8778
Epoch 38/1



Epoch 1/100, Loss: 3.9328
Epoch 2/100, Loss: 1.6024
Epoch 3/100, Loss: 1.4755
Epoch 4/100, Loss: 1.4064
Epoch 5/100, Loss: 1.3680
Epoch 6/100, Loss: 1.2744
Epoch 7/100, Loss: 1.2126
Epoch 8/100, Loss: 1.2330
Epoch 9/100, Loss: 1.2609
Epoch 10/100, Loss: 1.1657
Epoch 11/100, Loss: 1.1297
Epoch 12/100, Loss: 1.1138
Epoch 13/100, Loss: 1.1531
Epoch 14/100, Loss: 1.0689
Epoch 15/100, Loss: 1.3336
Epoch 16/100, Loss: 1.2155
Epoch 17/100, Loss: 1.0980
Epoch 18/100, Loss: 1.0409
Epoch 19/100, Loss: 0.9840
Epoch 20/100, Loss: 1.0226
Epoch 21/100, Loss: 1.0237
Epoch 22/100, Loss: 1.1067
Epoch 23/100, Loss: 1.0040
Epoch 24/100, Loss: 0.9448
Epoch 25/100, Loss: 1.0110
Epoch 26/100, Loss: 1.0243
Epoch 27/100, Loss: 0.9990
Epoch 28/100, Loss: 0.9476
Epoch 29/100, Loss: 0.9388
Epoch 30/100, Loss: 0.9584
Epoch 31/100, Loss: 0.9811
Epoch 32/100, Loss: 0.9154
Epoch 33/100, Loss: 1.0247
Epoch 34/100, Loss: 0.9590
Epoch 35/100, Loss: 0.9100
Epoch 36/100, Loss: 0.9392
Epoch 37/100, Loss: 0.9602
Epoch 38/1



In [16]:
data.head()

Unnamed: 0.1,Unnamed: 0,Question,Correct_Code,Code_with_Error,Total_Marks,AST_full,Predictions
0,0,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\nvoid printFactors(int numb...,7.0,CursorKind.FUNCTION_DECL printFactors\n Curso...,7.848756
1,1,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\nvoid printFactors(int numb...,8.0,CursorKind.FUNCTION_DECL printFactors\n Curso...,6.890309
2,2,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\nvoid printFactors(int numb...,5.0,CursorKind.FUNCTION_DECL printFactors\n Curso...,6.75139
3,3,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\n\nvoid printFactors(int nu...,7.0,CursorKind.FUNCTION_DECL printFactors\n Curso...,6.147062
4,4,Print the factors of a number,#include <stdio.h>\nvoid printFactors(int numb...,#include <stdio.h>\n\nvoid printFactors(int nu...,5.0,CursorKind.FUNCTION_DECL printFactors\n Curso...,7.496598


In [19]:
r2_score(data['Total_Marks'], data['Predictions'])

0.1865222771684788

In [18]:
data = data[data['Total_Marks']>0]

In [None]:
import torch

file_names = [f'batch_{i}.pt' for i in range(10)]  # batch_0.pt, batch_1.pt, ..., batch_8.pt

# Initialize an empty list to store the loaded tensors
all_tensors = []

# Load and append each tensor to the list
for file_name in file_names:
    tensor = torch.load(file_name)  # Load the tensor from the file
    all_tensors.append(tensor)  # Append it to the list

# Concatenate all tensors along the batch dimension (assuming they're of the same shape)
ast_embeddings = torch.cat(all_tensors, dim=0)  # dim=0 means concatenating along the batch dimension

# Save the combined tensor to a new .pt file
torch.save(ast_embeddings, 'combined_batches.pt')

print(f"Combined tensor saved as 'combined_batches.pt' with shape: {combined_tensor.shape}")
