In [1]:
import os
import nltk

# 1️⃣ Force local NLTK path
nltk_data_dir = os.path.join(os.getcwd(), 'nltk_data')
os.makedirs(nltk_data_dir, exist_ok=True)
nltk.data.path.insert(0, nltk_data_dir)

# 2️⃣ Download required packages to this folder (if missing)
for pkg, subfolder in [('punkt', 'tokenizers'), ('stopwords', 'corpora')]:
    try:
        nltk.data.find(f"{subfolder}/{pkg}")
        print(f"✅ '{pkg}' already exists.")
    except LookupError:
        print(f"⬇️ Downloading '{pkg}'...")
        nltk.download(pkg, download_dir=nltk_data_dir)

# 3️⃣ Now import tokenizer & stopwords
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))


✅ 'punkt' already exists.
✅ 'stopwords' already exists.


In [3]:
test = "Mathematics improves problem-solving and analytical thinking."
words = word_tokenize(test.lower())
print(words)


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'j:\\akhi\\phase4_nlp\\nltk_data'
    - 'C:\\Users\\Aditya/nltk_data'
    - 'j:\\akhi\\venv\\nltk_data'
    - 'j:\\akhi\\venv\\share\\nltk_data'
    - 'j:\\akhi\\venv\\lib\\nltk_data'
    - 'C:\\Users\\Aditya\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************


In [7]:
# =============================================
# AI Study Pal - Phase 4: NLP Study Tips (Fixed)
# =============================================

import os
import pandas as pd
from collections import Counter
import nltk

# Force local nltk_data path
nltk_data_dir = os.path.join(os.getcwd(), 'nltk_data')
nltk.data.path.clear()
nltk.data.path.append(nltk_data_dir)

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

# ✅ Verify punkt and stopwords are found
print("✅ punkt found at:", nltk.find('tokenizers/punkt'))
print("✅ stopwords found at:", nltk.find('corpora/stopwords'))

# Dataset
data = {
    "subject": ["Math", "Science", "History", "English", "Computer Science"],
    "text": [
        "Mathematics improves problem-solving and analytical thinking.",
        "Science enhances curiosity and innovation through research.",
        "History teaches lessons from ancient and modern times.",
        "English helps express thoughts clearly through reading and writing.",
        "Computer Science deals with computation, algorithms, and programming."
    ]
}

df = pd.DataFrame(data)
print("\n🔹 Original Dataset:")
print(df)

# Stopwords
stop_words = set(stopwords.words('english'))

# NLP logic
study_tips = []
for text in df['text']:
    words = word_tokenize(text.lower())  # ✅ No more LookupError
    keywords = [w for w in words if w.isalpha() and w not in stop_words]
    top_keywords = [kw for kw, _ in Counter(keywords).most_common(3)]
    study_tips.append(f"Focus on: {', '.join(top_keywords)} daily.")

df['study_tips'] = study_tips

print("\n🔹 Generated Study Tips:")
print(df)

# Save output
os.makedirs('data', exist_ok=True)
df.to_csv('data/study_tips.csv', index=False)
print("\n✅ Saved to 'data/study_tips.csv'")


✅ punkt found at: j:\akhi\phase4_nlp\nltk_data\tokenizers\punkt
✅ stopwords found at: j:\akhi\phase4_nlp\nltk_data\corpora\stopwords

🔹 Original Dataset:
            subject                                               text
0              Math  Mathematics improves problem-solving and analy...
1           Science  Science enhances curiosity and innovation thro...
2           History  History teaches lessons from ancient and moder...
3           English  English helps express thoughts clearly through...
4  Computer Science  Computer Science deals with computation, algor...


LookupError: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'j:\\akhi\\phase4_nlp\\nltk_data'
**********************************************************************
