<a href="https://colab.research.google.com/github/SakshamDataWizard/AI_Assistant/blob/main/AI_Assistant_cleaned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**AI - ASSISTANT for Learning and recommendations**


In [None]:
!pip install scikit-learn pandas numpy matplotlib seaborn joblib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
import joblib

print("✅ Setup complete.")


: 

In [None]:
data = pd.DataFrame({
    'hours_studied': [1, 2, 3, 2.5, 5, 8, 7, 6],
    'attendance_pct': [50, 60, 70, 65, 90, 95, 85, 80],
    'passed': [0, 0, 0, 0, 1, 1, 1, 1]
})
data.head()

In [None]:
X = data[['hours_studied', 'attendance_pct']]
y = data['passed']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

y_prob = model.predict_proba(X_test)[:,1]
print("ROC AUC:", roc_auc_score(y_test, y_prob))


joblib.dump(model, '/content/logreg_passfail.pkl')
print("✅ Model saved: logreg_passfail.pkl")

In [None]:
score_data = pd.DataFrame({
    'time_per_question_sec': [30, 45, 60, 20, 80, 35, 55, 40],
    'past_score': [50, 60, 55, 70, 80, 65, 75, 68],
    'difficulty': [2, 3, 2, 1, 4, 3, 4, 3],
    'score': [55, 63, 58, 72, 85, 67, 78, 70]
})
score_data.head()

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

X = score_data[['time_per_question_sec', 'past_score', 'difficulty']]
y = score_data['score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

lr = LinearRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

print("Random Forest:")
print("MAE:", mean_absolute_error(y_test, rf_pred))
print("RMSE:", mean_squared_error(y_test, rf_pred))

print("\n Linear Regression:")
print("MAE:", mean_absolute_error(y_test, lr_pred))
print("RMSE:", mean_squared_error(y_test, lr_pred))

joblib.dump(rf, '/content/rf_score_predictor.pkl')
joblib.dump(lr, '/content/lr_score_predictor.pkl')
print("✅ Models saved.")

In [None]:
importances = rf.feature_importances_
features = X.columns
plt.figure(figsize=(6, 4))
sns.barplot(x=importances, y=features)
plt.title('Feature Importance - Random Forest')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

cluster_data = pd.DataFrame({
    'time_on_topic': [20, 30, 40, 60, 15, 25, 70, 10],
    'avg_score': [60, 65, 70, 85, 55, 62, 90, 50],
    'num_attempts': [3, 2, 4, 1, 5, 3, 1, 6],
    'response_time': [10, 12, 15, 5, 20, 10, 6, 25]
})
cluster_data.head()

In [None]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(cluster_data)

kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(scaled_features)

cluster_data['cluster'] = cluster_labels

score = silhouette_score(scaled_features, cluster_labels)
print(f"Silhouette Score: {score:.3f}")
print(cluster_data)

joblib.dump(kmeans, '/content/kmeans_learning_style.pkl')
print("✅ KMeans model saved.")

In [None]:
!pip install xgboost

import xgboost as xgb

dropout_data = pd.DataFrame({
    'video_watch_count': [10, 20, 5, 0, 30, 25, 2, 3],
    'forum_posts': [1, 2, 0, 0, 4, 3, 0, 0],
    'problem_attempts': [5, 10, 2, 1, 15, 12, 1, 0],
    'last_active_days': [2, 1, 7, 14, 1, 2, 10, 20],
    'dropout_flag': [0, 0, 1, 1, 0, 0, 1, 1]
})
dropout_data.head()

In [None]:
X = dropout_data.drop('dropout_flag', axis=1)
y = dropout_data['dropout_flag']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save
joblib.dump(model, '/content/xgb_dropout_predictor.pkl')
print("✅ XGBoost dropout model saved.")


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.reshape(-1, 28, 28, 1) / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=3, validation_split=0.2)

model.save('/content/mnist_cnn.h5')
print("✅ CNN model saved.")


In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Input, Bidirectional

texts = [
    "The climate is changing rapidly",
    "Photosynthesis is crucial to plant growth",
    "Newton’s laws describe motion",
    "The French Revolution changed history"
]
labels = [0, 1, 2, 3]  # 4 classes (env, bio, physics, history)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
X = pad_sequences(sequences, maxlen=10)
y = to_categorical(labels)

input_ = Input(shape=(10,))
x = Embedding(input_dim=50, output_dim=8)(input_)
x = Bidirectional(LSTM(16))(x)
x = Dense(4, activation='softmax')(x)

model = Model(input_, x)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=10)

model.save('/content/bilstm_topic_classifier.h5')
print("✅ BiLSTM model saved.")


In [None]:
!pip install transformers

from transformers import pipeline

summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

text = """
Artificial Intelligence is transforming the educational landscape. It allows personalized learning experiences, automates grading, and provides real-time feedback to students and educators alike.
"""

summary = summarizer(text, max_length=40, min_length=10, do_sample=False)
print("Summary:", summary[0]['summary_text'])


In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

num_students = 300

df = pd.DataFrame({
    "study_hours": np.clip(np.random.normal(loc=4, scale=2, size=num_students), 0, 10),
    "attendance": np.clip(np.random.normal(loc=80, scale=10, size=num_students), 50, 100),
    "previous_scores": np.clip(np.random.normal(loc=70, scale=15, size=num_students), 0, 100)
})

df["final_score"] = (
    0.4 * df["study_hours"] * 10 +
    0.3 * df["attendance"] +
    0.3 * df["previous_scores"] +
    np.random.normal(0, 5, num_students)
) / 2
df["final_score"] = df["final_score"].clip(0, 100)

df["dropout_risk"] = ((df["attendance"] < 65) | (df["final_score"] < 50)).astype(int)

df.head()


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

X = df[["study_hours", "attendance", "previous_scores"]]
y_score = df["final_score"]
y_dropout = df["dropout_risk"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

score_regressor = LinearRegression()
score_regressor.fit(X_scaled, y_score)

dropout_classifier = RandomForestClassifier(random_state=42)
dropout_classifier.fit(X_scaled, y_dropout)

cluster_model = KMeans(n_clusters=3, random_state=42)
cluster_model.fit(X_scaled)


In [None]:
!pip install streamlit scikit-learn pandas matplotlib seaborn joblib


In [None]:
import joblib

joblib.dump(score_regressor, 'score_regressor.pkl')
joblib.dump(dropout_classifier, 'dropout_classifier.pkl')
joblib.dump(cluster_model, 'student_cluster_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


In [None]:
%%writefile AI_Assistant_app.py
import streamlit as st
import pandas as pd
import joblib
import numpy as np

score_model = joblib.load('score_regressor.pkl')
dropout_model = joblib.load('dropout_classifier.pkl')
cluster_model = joblib.load('student_cluster_model.pkl')
scaler = joblib.load('scaler.pkl')

st.set_page_config(page_title="AI Learning Assistant", layout="wide")

st.title("🎓 AI-Powered Personalized Learning Assistant")

with st.form("student_form"):
    st.subheader("Enter Student Details")

    study_hours = st.number_input("Daily Study Hours", min_value=0.0, max_value=24.0, step=0.5)
    attendance = st.slider("Attendance (%)", 0, 100, 75)
    previous_scores = st.number_input("Average Previous Scores (0-100)", min_value=0.0, max_value=100.0)

    submitted = st.form_submit_button("Predict")

if submitted:
    input_data = pd.DataFrame([[study_hours, attendance, previous_scores]],
                              columns=["study_hours", "attendance", "previous_scores"])

    scaled_data = scaler.transform(input_data)

    score_pred = score_model.predict(scaled_data)[0]
    dropout_risk = dropout_model.predict(scaled_data)[0]
    cluster = cluster_model.predict(scaled_data)[0]

    st.markdown(f"### 📊 Predicted Score: **{score_pred:.2f}**")
    st.markdown(f"### ⚠️ Dropout Risk: {'Yes' if dropout_risk else 'No'}")
    st.markdown(f"### 🧠 Learning Style Cluster: **Cluster {cluster}**")

    if cluster == 0:
        st.info("💡 Tip: Visual learner – use diagrams and charts.")
    elif cluster == 1:
        st.info("🎧 Tip: Auditory learner – benefit from lectures or podcasts.")
    else:
        st.info("📝 Tip: Kinesthetic learner – hands-on activities work best.")


In [None]:
!pip install streamlit pandas

In [None]:
! streamlit run AI_Assistant_app.py & npx localtunnel --port 8501