In [1]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

# Path to your CSV file
file_path = '/content/drive/My Drive/slt_cv/all_correct.csv'



Mounted at /content/drive


In [2]:
# Load the dataset
dataset = pd.read_csv(file_path)

# Display the first few rows
print(dataset.head())

# Check class balance
print(dataset['fail/pass'].value_counts(normalize=True) * 100)

   OL_Passes  OL_Credits OL_Math OL_English OL_Sinhala_Tamil  AL_Passes  \
0          9           7       S          S                C          0   
1          6           2       A          S                F          0   
2          8           3       S          S                A          0   
3          7           3       A          F                S          3   
4          7           4       C          F                F          2   

   fail/pass  
0          0  
1          0  
2          0  
3          0  
4          0  
fail/pass
0    50.0
1    50.0
Name: proportion, dtype: float64


In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Encoding categorical grades
grade_mapping = {'A': 4, 'B': 3, 'C': 2, 'S': 1, 'F': 0}
dataset['OL_Math'] = dataset['OL_Math'].map(grade_mapping)
dataset['OL_English'] = dataset['OL_English'].map(grade_mapping)
dataset['OL_Sinhala_Tamil'] = dataset['OL_Sinhala_Tamil'].map(grade_mapping)

# Splitting data
X = dataset.drop(columns=['fail/pass'])
y = dataset['fail/pass']

In [4]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Model training
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        51
           1       1.00      1.00      1.00        47

    accuracy                           1.00        98
   macro avg       1.00      1.00      1.00        98
weighted avg       1.00      1.00      1.00        98



In [5]:
import pandas as pd
import numpy as np

# Sample manual test data
test_data = pd.DataFrame({
    "OL_Passes": [6, 5, 8, 7, 6],
    "OL_Credits": [3, 2, 4, 3, 2],
    "OL_Math": ["A", "F", "B", "C", "S"],
    "OL_English": ["B", "S", "A", "C", "F"],
    "OL_Sinhala_Tamil": ["C", "S", "A", "B", "F"],
    "AL_Passes": [3, 2, 3, 1, 3],
    "fail/pass": [1, 0, 1, 0, 0]  # Ground truth for testing
})

# Grade Mapping
grade_mapping = {'A': 4, 'B': 3, 'C': 2, 'S': 1, 'F': 0}
test_data['OL_Math'] = test_data['OL_Math'].map(grade_mapping)
test_data['OL_English'] = test_data['OL_English'].map(grade_mapping)
test_data['OL_Sinhala_Tamil'] = test_data['OL_Sinhala_Tamil'].map(grade_mapping)

# Drop the ground truth column temporarily for predictions
X_test = test_data.drop(columns=["fail/pass"])

# Display data
print(X_test)

   OL_Passes  OL_Credits  OL_Math  OL_English  OL_Sinhala_Tamil  AL_Passes
0          6           3        4           3                 2          3
1          5           2        0           1                 1          2
2          8           4        3           4                 4          3
3          7           3        2           2                 3          1
4          6           2        1           0                 0          3


In [7]:
# Make predictions
predictions = rf_model.predict(X_test)

# Add predictions to the test data
test_data['predicted_fail/pass'] = predictions

# Compare predictions with ground truth
print(test_data)

   OL_Passes  OL_Credits  OL_Math  OL_English  OL_Sinhala_Tamil  AL_Passes  \
0          6           3        4           3                 2          3   
1          5           2        0           1                 1          2   
2          8           4        3           4                 4          3   
3          7           3        2           2                 3          1   
4          6           2        1           0                 0          3   

   fail/pass  predicted_fail/pass  
0          1                    0  
1          0                    0  
2          1                    1  
3          0                    0  
4          0                    0  
