<a href="https://colab.research.google.com/github/Tejasri1557/AIML-2025/blob/main/2303A51557_09.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Assessing Mathematical Learning in Higher Education
Q1: Identify the Math topic which is most hard to learn?
Q2: List the top5 math sub-topics for the students?
Q3. Identify the questions level most solved by the students?
Q4: Name the country with most solved math problems?
Q5: List top 10 keywords related to math topics most searched for?


In [25]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Simulated data: Topics, number of students, average grade, and difficulty score
data = {
    'Topic': ['Abstract Algebra', 'Calculus', 'Differential Equations', 'Topology', 'Real Analysis', 'Linear Algebra', 'Probability and Statistics'],
    'Num_Students': [300, 500, 350, 100, 450, 400, 600],
    'Avg_Grade': [65, 85, 70, 60, 75, 80, 78],  # Simulating average student performance
    'Difficulty_Score': [9, 7, 8, 10, 9, 6, 5]  # Difficulty from 1 (easy) to 10 (hard)
}

# Convert data into DataFrame
df = pd.DataFrame(data)

# Features (independent variables) and target (dependent variable)
X = df[['Num_Students', 'Avg_Grade']]  # Features
y = df['Difficulty_Score']  # Target variable (Difficulty Score)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the difficulty for a topic
predicted_difficulty = model.predict([[500, 80]])  # Example: 500 students, avg grade of 80
print(f"Predicted difficulty score: {predicted_difficulty[0]}")


Predicted difficulty score: 6.143495753240948




In [26]:
from sklearn.cluster import KMeans

# Simulated data: Sub-topics and student interest (number of students studying each sub-topic)
sub_topics_data = {
    'Sub_Topic': ['Calculus', 'Linear Algebra', 'Probability and Statistics', 'Abstract Algebra', 'Differential Equations', 'Geometry', 'Matrix Operations', 'Number Theory'],
    'Interest_Score': [85, 90, 88, 70, 75, 65, 80, 78]
}

# Convert data into DataFrame
sub_topics_df = pd.DataFrame(sub_topics_data)

# Use KMeans to group sub-topics into clusters
kmeans = KMeans(n_clusters=5, random_state=42)
sub_topics_df['Cluster'] = kmeans.fit_predict(sub_topics_df[['Interest_Score']])

# Display top 5 sub-topics based on their interest scores
top_5_sub_topics = sub_topics_df.sort_values(by='Interest_Score', ascending=False).head(5)
print("Top 5 math sub-topics based on interest:")
print(top_5_sub_topics[['Sub_Topic', 'Interest_Score']])


Top 5 math sub-topics based on interest:
                    Sub_Topic  Interest_Score
1              Linear Algebra              90
2  Probability and Statistics              88
0                    Calculus              85
6           Matrix Operations              80
7               Number Theory              78


In [27]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Simulated data: Number of problems solved, time spent, and question level (target)
data_q3 = {
    'Num_Problems_Solved': [20, 50, 80, 30, 60, 90, 15],
    'Time_Spent_Minutes': [15, 30, 45, 25, 35, 50, 20],
    'Level': ['Basic', 'Intermediate', 'Advanced', 'Basic', 'Intermediate', 'Advanced', 'Basic']
}

# Convert data into DataFrame
df_q3 = pd.DataFrame(data_q3)

# Encode target labels (levels)
encoder = LabelEncoder()
df_q3['Level_Encoded'] = encoder.fit_transform(df_q3['Level'])

# Features and target
X_q3 = df_q3[['Num_Problems_Solved', 'Time_Spent_Minutes']]
y_q3 = df_q3['Level_Encoded']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_q3, y_q3, test_size=0.2, random_state=42)

# Train a RandomForest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Predict question level for a new student
predicted_level = rf_model.predict([[70, 40]])  # Example: Solved 70 problems, spent 40 minutes
predicted_level_str = encoder.inverse_transform(predicted_level)
print(f"Predicted question level: {predicted_level_str[0]}")


Predicted question level: Intermediate




In [28]:
# Simulated country data for performance in math competitions
countries_data = {
    'Country': ['China', 'United States', 'Russia', 'South Korea', 'India'],
    'Math_Problems_Solved': [1500, 1300, 1200, 1100, 900]  # Simulated problems solved
}

# Convert data into DataFrame
df_countries = pd.DataFrame(countries_data)

# Identify the country with the most solved math problems
top_country = df_countries.loc[df_countries['Math_Problems_Solved'].idxmax()]
print(f"Country with most solved math problems: {top_country['Country']}")


Country with most solved math problems: China


In [29]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

# Simulated list of math-related search queries
search_queries = [
    "Learn Algebra",
    "Calculus basics",
    "Introduction to Linear Algebra",
    "Probability theory and statistics",
    "Differential equations for beginners",
    "Matrix operations",
    "Understanding number theory",
    "Geometry problems",
    "Real analysis for beginners",
    "Advanced calculus"
]

# Vectorize the search queries
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(search_queries)

# Use LDA (Latent Dirichlet Allocation) to extract top 10 keywords
lda = LatentDirichletAllocation(n_components=1, random_state=42)
lda.fit(X)

# Get the top 10 keywords from the LDA model
top_keywords = np.array(vectorizer.get_feature_names_out())
keywords_scores = lda.components_[0]
top_10_keywords = top_keywords[keywords_scores.argsort()[-10:]][::-1]
print("Top 10 keywords related to math topics:")
for idx, keyword in enumerate(top_10_keywords, 1):
    print(f"{idx}. {keyword}")


Top 10 keywords related to math topics:
1. algebra
2. beginners
3. calculus
4. theory
5. understanding
6. introduction
7. analysis
8. basics
9. differential
10. equations
