In [8]:
pip install kaggle



In [17]:
import zipfile
import os

zip_path = "/content/archive (1).zip"  # path to your zip file
extract_path = "/content/dataset"      # folder to extract into

# Make sure the folder exists
os.makedirs(extract_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Extraction complete!")


✅ Extraction complete!


In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# --- Configuration ---
SEQUENCE_LENGTH = 10
DATA_FILE = "/content/student-por.csv"   # ✅ your dataset path

# --- Preprocessing Functions ---
def create_sequences(df_student, sequence_length):
    sequences, targets = [], []
    for i in range(len(df_student) - sequence_length):
        seq = df_student.iloc[i: i + sequence_length]
        label = df_student.iloc[i + sequence_length]['target']
        sequences.append(seq.values)
        targets.append(label)
    return sequences, targets

def preprocess_data(file_path, sequence_length=SEQUENCE_LENGTH):
    print("📥 Loading dataset...")
    df = pd.read_csv('/content/dataset/student-por.csv')

    if df.empty:
        raise ValueError("Dataset is empty!")

    # 🎯 Convert final grade (G3) into Pass/Fail
    df['target'] = (df['G3'] >= 10).astype(int)   # 1 = Pass, 0 = Fail
    df = df.drop(columns=['G3'])

    # Encode categorical columns
    for col in df.select_dtypes(include=['object']).columns:
        df[col] = LabelEncoder().fit_transform(df[col])

    # Create sequences (per student OR just sequential since no student_id column)
    all_sequences, all_targets = create_sequences(df, sequence_length)

    if not all_sequences:
        raise ValueError("Not enough data to create sequences. Try reducing SEQUENCE_LENGTH.")

    # Convert to numpy
    X, y = np.array(all_sequences), np.array(all_targets)

    # Flatten for splitting
    num_samples = X.shape[0]
    num_features = X.shape[1] * X.shape[2]
    X_reshaped = X.reshape(num_samples, num_features)

    # Train/test split
    X_train_flat, X_test_flat, y_train, y_test = train_test_split(
        X_reshaped, y, test_size=0.2, random_state=42
    )

    # Reshape back to 3D for LSTM
    X_train = X_train_flat.reshape(-1, sequence_length, X.shape[2])
    X_test = X_test_flat.reshape(-1, sequence_length, X.shape[2])

    print(f"✅ Training sequences: {X_train.shape[0]}")
    print(f"✅ Testing sequences: {X_test.shape[0]}")

    return X_train, X_test, y_train, y_test

def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, input_shape=input_shape, return_sequences=False),
        Dropout(0.2),
        Dense(1, activation='sigmoid')  # ✅ binary classification
    ])
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# --- Main Execution ---
try:
    X_train, X_test, y_train, y_test = preprocess_data(DATA_FILE)

    model = build_lstm_model((X_train.shape[1], X_train.shape[2]))

    print("\n🚀 Training model...")
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), verbose=1)
    print("🎉 Training complete.")

    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"\n📊 Model Evaluation:")
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    # Prediction example
    sample_sequence = X_test[-1].reshape(1, X_test.shape[1], X_test.shape[2])
    prediction = model.predict(sample_sequence, verbose=0)[0][0]
    actual_label = y_test[-1]

    print(f"\n🔮 Prediction Example:")
    print(f"Predicted Probability of Passing: {prediction:.4f}")
    print(f"Prediction: {'Pass ✅' if prediction > 0.5 else 'Fail ❌'}")
    print(f"Actual: {'Pass ✅' if actual_label == 1 else 'Fail ❌'}")

except Exception as e:
    print(f"❌ Error: {e}")


📥 Loading dataset...
✅ Training sequences: 511
✅ Testing sequences: 128

🚀 Training model...
Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.6266 - loss: 0.6344 - val_accuracy: 0.8750 - val_loss: 0.3799
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8184 - loss: 0.4942 - val_accuracy: 0.8750 - val_loss: 0.3745
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8494 - loss: 0.4146 - val_accuracy: 0.8750 - val_loss: 0.3578
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8588 - loss: 0.3914 - val_accuracy: 0.8750 - val_loss: 0.3426
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8474 - loss: 0.3892 - val_accuracy: 0.8750 - val_loss: 0.3274
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8285