In [6]:
import pandas as pd
import numpy as np
import warnings
from datasets import load_dataset

warnings.filterwarnings('ignore')

# Load dataset from Hugging Face using datasets library
print("Loading dataset... (this may take a moment)")
dataset = load_dataset("derek-thomas/ScienceQA")

# Combine all splits
df = pd.concat([
    dataset['train'].to_pandas(),
    dataset['test'].to_pandas(),
    dataset['validation'].to_pandas()
])

df = df[['question', 'grade']]

df = df.groupby('grade').filter(lambda x: len(x) >= 2000)

df.loc[np.random.choice(df.index, size=int(0.10 * len(df)), replace=False), 'question'] = np.nan
df.loc[np.random.choice(df.index, size=int(0.05 * len(df)), replace=False), 'grade'] = np.nan
df['question'] = df['question'].str.replace(' ', '  ', regex=False)
df = pd.concat([df, df.sample(frac=0.05)], ignore_index=True)
df = df.sample(frac=1).reset_index(drop=True)

df.head()

Loading dataset... (this may take a moment)


Generating train split: 100%|██████████| 12726/12726 [00:01<00:00, 7700.49 examples/s] 
Generating validation split: 100%|██████████| 4241/4241 [00:00<00:00, 11441.20 examples/s]
Generating test split: 100%|██████████| 4241/4241 [00:00<00:00, 12710.68 examples/s]


Unnamed: 0,question,grade
0,,grade5
1,Which word would you find on a dictiona...,grade6
2,Which property do these three objects ha...,grade3
3,Which sentence states a fact?,grade6
4,Complete the sentence.\nPollen helps a pl...,grade4


In [7]:
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import gradio as gr
import joblib

In [8]:
print("Dataset Shape:", df.shape)
print("\nFirst few rows:")
print(df.head())
print("\nDataset Info:")
print(df.info())
print("\nMissing Values:")
print(df.isnull().sum())
print("\nGrade Distribution:")
print(df['grade'].value_counts())

Dataset Shape: (18277, 2)

First few rows:
                                            question   grade
0                                                NaN  grade5
1  Which  word  would  you  find  on  a  dictiona...  grade6
2  Which  property  do  these  three  objects  ha...  grade3
3                  Which  sentence  states  a  fact?  grade6
4  Complete  the  sentence.\nPollen  helps  a  pl...  grade4

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18277 entries, 0 to 18276
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   question  14836 non-null  object
 1   grade     16552 non-null  object
dtypes: object(2)
memory usage: 285.7+ KB
None

Missing Values:
question    3441
grade       1725
dtype: int64

Grade Distribution:
grade
grade4    3394
grade5    2913
grade3    2878
grade7    2618
grade8    2408
grade6    2341
Name: count, dtype: int64


In [9]:
df_clean = df.dropna().drop_duplicates()
df_clean['question'] = df_clean['question'].str.replace('  ', ' ', regex=False)

In [10]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df_clean['question_processed'] = df_clean['question'].apply(preprocess_text)

In [11]:
X = df_clean['question_processed']
y = df_clean['grade']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [12]:
tfidf = TfidfVectorizer(max_features=5000, min_df=2, max_df=0.8, ngram_range=(1, 2))
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [13]:
model = LogisticRegression(max_iter=1000, random_state=42, multi_class='multinomial')
model.fit(X_train_tfidf, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,1000


In [14]:
y_pred = model.predict(X_test_tfidf)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\n", classification_report(y_test, y_pred))

Accuracy: 0.3833

               precision    recall  f1-score   support

      grade3       0.45      0.49      0.46       169
      grade4       0.37      0.42      0.39       221
      grade5       0.30      0.24      0.27       176
      grade6       0.32      0.23      0.27       204
      grade7       0.34      0.42      0.38       233
      grade8       0.48      0.48      0.48       239

    accuracy                           0.38      1242
   macro avg       0.38      0.38      0.38      1242
weighted avg       0.38      0.38      0.38      1242



In [15]:
def predict_grade(question_text):
    processed_text = preprocess_text(question_text)
    text_tfidf = tfidf.transform([processed_text])
    prediction = model.predict(text_tfidf)[0]
    probabilities = model.predict_proba(text_tfidf)[0]
    grade_probs = {grade: float(prob) for grade, prob in zip(model.classes_, probabilities)}
    return prediction, grade_probs

In [19]:
def gradio_predict(question_text):
    if not question_text.strip():
        return "Please enter a question!", {}
    predicted_grade, probabilities = predict_grade(question_text)
    result = f"**Predicted Grade Level:** {predicted_grade.upper()}"
    return result, probabilities

interface = gr.Interface(
    fn=gradio_predict,
    inputs=gr.Textbox(lines=5, placeholder="Enter a science question here...", label="Question"),
    outputs=[
        gr.Textbox(label="Prediction"),
        gr.Label(label="Confidence Scores", num_top_classes=6)
    ],
    title="YourName",  # REPLACE WITH YOUR FIRST NAME
    description="Enter a science question to predict its grade level (grade3-grade8).",
    examples=[
        ["What is photosynthesis?"],
        ["Explain the water cycle."],
        ["What are the phases of mitosis?"]
    ]
)

interface.launch(share=True)


* Running on local URL:  http://127.0.0.1:7863

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


