# AI Study Pal Capstone: Summarizer Model Training
This notebook trains a simple Keras neural network to summarize text for the AI Study Pal project.

In [None]:
# 1. Import Required Libraries
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
import joblib

In [None]:
# 2. Load and Explore Synthetic Dataset
# Each sample: long text, short summary
texts = [
    "Machine learning is a subset of artificial intelligence that enables systems to learn from data and improve over time without being explicitly programmed.",
    "Supervised learning uses labeled data to train models to predict outcomes for new, unseen data.",
    "Unsupervised learning finds patterns and relationships in data without labeled outcomes.",
    "Neural networks are computational models inspired by the human brain, used for tasks like classification and regression.",
    "Logistic regression is a statistical method for binary classification problems.",
]
summaries = [
    "Machine learning lets systems learn from data.",
    "Supervised learning uses labeled data for prediction.",
    "Unsupervised learning finds patterns in unlabeled data.",
    "Neural networks mimic the brain for classification.",
    "Logistic regression solves binary classification."
]
df = pd.DataFrame({'text': texts, 'summary': summaries})
df.head()

In [None]:
# 3. Data Preprocessing
vectorizer = CountVectorizer(max_features=50)
X = vectorizer.fit_transform(df['text']).toarray()
Y = vectorizer.transform(df['summary']).toarray()

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
# 4. Feature Engineering
# (BoW features used; can add text length)
df['length'] = df['text'].apply(len)
X_len = np.array(df['length']).reshape(-1, 1)
X_full = np.hstack([X, X_len])

In [None]:
# 5. Model Selection and Training
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_full.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(Y.shape[1], activation='sigmoid')
])
model.compile(optimizer=Adam(0.01), loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_full, Y, epochs=100, batch_size=2, verbose=0)

In [None]:
# 6. Model Evaluation
loss, acc = model.evaluate(X_full, Y, verbose=0)
print(f"Train Accuracy: {acc:.2f}")

In [None]:
# 7. Hyperparameter Tuning
# (Demo: Try different learning rates)
for lr in [0.001, 0.01, 0.1]:
    model.compile(optimizer=Adam(lr), loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_full, Y, epochs=20, batch_size=2, verbose=0)
    loss, acc = model.evaluate(X_full, Y, verbose=0)
    print(f"LR={lr}: Train Accuracy={acc:.2f}")

In [None]:
# 8. Model Deployment Example
# Save model and vectorizer
model.save('summarizer_model.h5')
joblib.dump(vectorizer, 'summarizer_vectorizer.pkl')

# Load and use model
from tensorflow.keras.models import load_model
vectorizer_loaded = joblib.load('summarizer_vectorizer.pkl')
model_loaded = load_model('summarizer_model.h5')

sample_text = ["Explain supervised learning in AI."]
X_sample = vectorizer_loaded.transform(sample_text).toarray()
X_sample_full = np.hstack([X_sample, np.array([len(t) for t in sample_text]).reshape(-1, 1)])
pred_summary_vec = model_loaded.predict(X_sample_full)
# Convert vector back to words (demo)
words = vectorizer_loaded.get_feature_names_out()
summary = ' '.join([words[i] for i, v in enumerate(pred_summary_vec[0]) if v > 0.1])
print('Predicted summary:', summary)