In [154]:
import pandas as pd;
import numpy as np;

relation=pd.read_csv("prerequisite_annotations.csv")
id_map=pd.read_csv("topics_to_resources.csv")


In [155]:
relation.head()

Unnamed: 0,source_topic_id,target_topic_id,prereq_relation,annotator_id
0,2,3,-1,56
1,2,4,-1,3
2,2,5,-1,3
3,2,6,-1,3
4,2,7,-1,15


In [156]:
id_map.head()

Unnamed: 0,200_topic_id,200_topic_name,resource_id,url
0,100,markov_decision_processes,7487,https://www.autonlab.org/_media/tutorials/mdp0...
1,100,markov_decision_processes,6889,http://www.pomdp.org/tutorial/mdp.html
2,100,markov_decision_processes,63835,https://danieltakeshi.github.io/2015-08-02-mar...
3,100,markov_decision_processes,7079,http://www.deeplearningindaba.com/uploads/1/0/...
4,100,markov_decision_processes,933,https://pdfs.semanticscholar.org/2389/9124a85e...


In [157]:
id_to_name=dict(zip(id_map['200_topic_id'],id_map['200_topic_name']))
print(id_to_name)
relation.drop(['annotator_id'],axis=1,inplace=True)
relation.head()

{100: 'markov_decision_processes', 91: 'clustering', 81: 'kernel_function', 135: 'linear_programming', 128: 'differential_calculus', 34: 'tree_adjoining_grammar', 151: 'statistical_machine_translation', 73: 'multilingual_word', 105: 'matrix_factorization', 193: 'pointer_networks', 103: 'speech_recognition', 38: 'language_modeling', 16: 'coreference_and_coreference_resolution', 155: 'conditional_probability', 106: 'spectral_methods', 23: 'reinforcement_learning', 60: 'morphological_disambiguation', 41: 'sentence_boundary_recognition', 44: 'latent_dirichlet_allocation', 172: 'normalization', 15: 'abstract_meaning_representation', 75: 'linear_regression', 78: 'dual_problems', 125: 'beam_search', 19: 'context_sensitive_grammar', 29: 'semantic_parsing', 165: 'n_gram', 7: 'bleu', 178: 'entropy', 180: 'activation_functions', 149: 'gated_recurrent_units', 46: 'language_identification', 148: 'log_linear_models', 150: 'gibbs_sampling', 174: 'data_preprocessing', 162: 'image_retrieval', 114: 'gra

Unnamed: 0,source_topic_id,target_topic_id,prereq_relation
0,2,3,-1
1,2,4,-1
2,2,5,-1
3,2,6,-1
4,2,7,-1


In [158]:
relation=relation[relation['prereq_relation']==1]
import networkx as nx

G=nx.DiGraph()
for _, row in relation.iterrows():
    src=id_to_name.get(row["source_topic_id"])
    dst=id_to_name.get(row["target_topic_id"])
    if src and dst:
        G.add_edge(src, dst)#src->dst

In [159]:

def generate_study_plan(target_topic):
    if target_topic not in G:
        return "Topic not found in trained data."
    
    ancestors=nx.ancestors(G,target_topic)
    subgraph=G.subgraph(ancestors)
    try:
        return list(nx.topological_sort(subgraph))
    except nx.NetworkXUnfeasible:
        return "Cycle detected in prerequisite graph"

In [160]:
target="markov_decision_processes"
plan=generate_study_plan(target)

print("Main Topic Prerequisites (Recommended) Study Plan:")
if isinstance(plan, list):
    for i, topic in enumerate(plan, 1):
        print(f"{i}-> {topic}")
else:
    print(plan)

Main Topic Prerequisites (Recommended) Study Plan:
1-> matrix_factorization
2-> matrix_multiplication


In [161]:

tree_of_ancrelations={}
def build_pre_tree(G, topic, visited=None):
    if visited is None:
        visited = set()
    if topic in visited:
        return None,0
    visited.add(topic)
    tree_of_prerelations = {}
    count_pre=0
    for parent in sorted(G.predecessors(topic)):
        if parent not in visited:
            subtree,subcount= build_pre_tree(G, parent, visited)
            if subtree is not None:
                tree_of_prerelations[parent]=subtree
                count_pre+=1+subcount
            else:
                tree_of_prerelations[parent]={}
    return tree_of_prerelations,count_pre

def build_de_tree(G, topic, visited=None):
    if visited is None:
        visited = set()
    if topic in visited:
        return {},0
    visited.add(topic)
    count_de=0

    for child in sorted(G.successors(topic)):
        subtree,subcount2 = build_de_tree(G, child, visited)
        if subtree is not None:
            tree_of_ancrelations[child]=subtree
            count_de+=1+subcount2
    return tree_of_ancrelations,count_de



In [162]:
print(build_pre_tree(G, target))
print(build_de_tree(G, target))

({'matrix_factorization': {}, 'matrix_multiplication': {}}, 2)
({'bootstrapping': {...}, 'gibbs_sampling': {...}, 'particle_filter': {...}, 'markov_chain_monte_carlo': {...}, 'q_learning': {...}, 'policy_gradient_methods': {}, 'dqn': {...}, 'game_playing_in_ai': {...}, 'reinforcement_learning': {}}, 17)


In [163]:
def print_tree(tree_of_relations, is_subtopic=False,visited=None):
    if visited is None:
        visited = set()
    for topic, subtopics in tree_of_relations.items():
        if topic in visited:
            continue
        visited.add(topic)
        print(f"--> {topic}")
        if subtopics: 
            print_tree(subtopics, is_subtopic=True,visited=visited)


In [164]:
def extract_count(G):
    data=[]
    for node in G.nodes():
        prereq=build_pre_tree(G,node)[1]
        deps=build_de_tree(G,node)[1]
        total_degree=prereq+deps
        data.append({"topic": node,
            "prerequisites_count": prereq,
            "dependents_count": deps,
            "total_degree": total_degree})
    return pd.DataFrame(data)
features_Add=extract_count(G)
print(features_Add.head())

                             topic  prerequisites_count  dependents_count  \
0              anaphora_resolution                   65                14   
1                  event_detection                   65                14   
2        word_sense_disambiguation                   67                 3   
3                   dialog_systems                   81                 2   
4  generative_adversarial_networks                    3                 1   

   total_degree  
0            79  
1            79  
2            70  
3            83  
4             4  


Approximating a general relation for study hours needed.

In [165]:
features_Add["study_hours"] = (
    features_Add["prerequisites_count"] * 2 +
    features_Add["dependents_count"] * 3 +
    3
)
print(features_Add.head())


                             topic  prerequisites_count  dependents_count  \
0              anaphora_resolution                   65                14   
1                  event_detection                   65                14   
2        word_sense_disambiguation                   67                 3   
3                   dialog_systems                   81                 2   
4  generative_adversarial_networks                    3                 1   

   total_degree  study_hours  
0            79          175  
1            79          175  
2            70          146  
3            83          171  
4             4           12  


In [166]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
model=LinearRegression()

x = features_Add[['prerequisites_count', 'dependents_count', 'total_degree']]
y = features_Add['study_hours']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model.fit(x_train, y_train)
y_pred = model.predict(x_test)
from sklearn.metrics import mean_squared_error, r2_score
accuracy=model.score(x_test,y_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")
print(accuracy)


Mean Squared Error: 3.9494496522313336e-26
R-squared: 1.0
1.0


In [167]:
print("Study Tree:\n")
print("PREREQUISITES")
tree_pre=build_pre_tree(G,target)
tree_de=build_de_tree(G,target)   
print_tree(tree_pre)
print()
print("DEPENDENCIES")
print_tree(tree_de)

Study Tree:

PREREQUISITES


AttributeError: 'tuple' object has no attribute 'items'

Linear Regression to predict study hours

In [None]:
import streamlit as st
import pandas as pd
from sklearn.linear_model import LinearRegression

st.title("Study Plan Generator")
mode = st.selectbox("Select Mode",["Training", "Prediction"])

if mode == "Training":
    st.subheader("Training Mode")
    new_topic = st.text_input("Enter topic name")
    prereq = st.number_input("Prerequisites count", min_value=0, step=1)
    depend = st.number_input("Dependents count", min_value=0, step=1)
    total = prereq + depend
    newrow= pd.DataFrame({
        'topic': [new_topic],
        'prerequisites_count': [prereq],
        'dependents_count': [depend],
        'total_degree': [total],
        'study_hours': [0]
    })
    df = pd.concat([features_Add, newrow], ignore_index=True)
    X = df[["prerequisites_count", "dependents_count", "total_degree"]]
    y = df["study_hours"]
    model.fit(X, y)
    st.success(f"✅ Model retrained with new topic: {new_topic}")


elif mode == "Prediction":
    st.subheader("Prediction Mode")
    topic = st.selectbox("Choose a topic:", features_Add["topic"])
    row = features_Add[features_Add["topic"] == topic].iloc[0]
    st.write("### Topic Details")
    st.write(f"- **Prerequisites count:** {row['prerequisites_count']}")
    st.write(f"- **Dependents count:** {row['dependents_count']}")
    st.write(f"- **Total degree:** {row['total_degree']}")
    if st.button("Predict Study Hours"):
            features = [[row["prerequisites_count"], row["dependents_count"], row["total_degree"]]]
            pred = model.predict(features)[0]
            st.write(f"Predicted Study Hours: {pred}")




In [168]:
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OrdinalEncoder,StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

df=pd.read_csv("Student_Lifestyle_Dataset.csv")
encoder=OrdinalEncoder(categories=[["Low", "Moderate", "High"]])
df['Stress_Level']=encoder.fit_transform(df[['Stress_Level']])

X=df.drop(columns=['Student_ID','Stress_Level','GPA'])
Y=df['Stress_Level']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42,stratify=Y)

svm_model=Pipeline([('scaler',StandardScaler()),('svm',SVC(kernel='rbf', C=1.0, gamma='scale', probability=True, random_state=42))])
svm_model.fit(X_train,Y_train)
Y_pred=svm_model.predict(X_test)
accuracy=svm_model.score(X_test,Y_test)
print(accuracy)
print("Test Accuracy:", accuracy)

input = pd.DataFrame({
    "Study_Hours_Per_Day": [0],
    "Extracurricular_Hours_Per_Day": [1],
    "Sleep_Hours_Per_Day": [7],
    "Social_Hours_Per_Day": [2],
    "Physical_Activity_Hours_Per_Day": [1]
})

prediction = svm_model.predict(input)
probability = svm_model.predict_proba(input)

print("Prediction:", prediction)
print("Probability of Good Outcome:", probability)

0.9616666666666667
Test Accuracy: 0.9616666666666667
Prediction: [2.]
Probability of Good Outcome: [[0.13871197 0.02374552 0.8375425 ]]


In [127]:
print(Y.value_counts(normalize=True))


Stress_Level
0.0    0.5145
2.0    0.3370
1.0    0.1485
Name: proportion, dtype: float64


In [128]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, Y_train)
print("Logistic Regression Accuracy:", log_model.score(X_test, Y_test))


Logistic Regression Accuracy: 0.8416666666666667


In [172]:
from sklearn.metrics import accuracy_score
df=pd.read_csv("Student_Lifestyle_Dataset.csv")
df['Outcome efficiency']=((df["GPA"] >=2.0)&(df["Stress_Level"].isin(["Low", "Moderate"]))).astype(int)
from sklearn.ensemble import RandomForestClassifier
X=df.drop(columns=['Student_ID','GPA','Outcome efficiency','Stress_Level'])
Y=df['Outcome efficiency']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42,stratify=Y)
rf_model=RandomForestClassifier()
rf_model.fit(X_train, Y_train)
accuracy=rf_model.score(X_test,Y_test)
y_pred = rf_model.predict(X_test)
acc=accuracy_score(Y_test, Y_pred)
print(accuracy)


input = pd.DataFrame({
    "Study_Hours_Per_Day": [0],
    "Extracurricular_Hours_Per_Day": [1],
    "Sleep_Hours_Per_Day": [7],
    "Social_Hours_Per_Day": [2],
    "Physical_Activity_Hours_Per_Day": [1]
})

prediction = rf_model.predict(input)
probability = rf_model.predict_proba(input)

print("Prediction:", prediction)
print("Probability of Good Outcome:", probability)

features=rf_model.feature_importances_
print("Feature Importances:", features)
print(df)
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': features
}).sort_values(by='Importance', ascending=False)

print(importance_df)

import pandas as pd
corr = df.corr(numeric_only=True)
print(corr['Outcome efficiency'].sort_values(ascending=False))



1.0
Prediction: [1]
Probability of Good Outcome: [[0.05 0.95]]
Feature Importances: [0.60496481 0.0134748  0.30100158 0.02225329 0.05830551]
      Student_ID  Study_Hours_Per_Day  Extracurricular_Hours_Per_Day  \
0              1                  6.9                            3.8   
1              2                  5.3                            3.5   
2              3                  5.1                            3.9   
3              4                  6.5                            2.1   
4              5                  8.1                            0.6   
...          ...                  ...                            ...   
1995        1996                  6.5                            0.2   
1996        1997                  6.3                            2.8   
1997        1998                  6.2                            0.0   
1998        1999                  8.1                            0.7   
1999        2000                  9.0                            1.