In [1]:
import json
from openai import OpenAI

In [50]:
def load_course_info(file_path):
    """
    Loads the course information from a JSON file.
    """
    with open(file_path, 'r') as file:
        course_info = json.load(file)
    return course_info

In [64]:
# for traditional NLP, please modify this function for your json data structure
def format_course_info_for_gpt(course_info):
    """
    Formats the course information into a text summary that GPT can understand.
    """
    summaries = []
    for course, details in course_info.items():
        concepts = ', '.join([concept['name'] for concept in details['Concepts']])
        key_functions = ', '.join(details['code']['Key_functions'])
        summary = f"{details['Teaching Week']}: {details['Title']} covers concepts {concepts} and key functions {key_functions}."
        summaries.append(summary)
    return " ".join(summaries)

In [65]:
file_path = "info.json"
course_info = load_course_info(file_path)
course_info_summary = format_course_info_for_gpt(course_info)

In [66]:
course_info_summary #展示GPT需要了解的知识

"week 1: Introduction to Natural Language Processing covers concepts Overview of NLP, Historical Background, Approaches to NLP, Preprocessing Techniques and key functions re.sub(pattern, replacement, string), PorterStemmer(), word_tokenize(), nltk.pos_tag(), nltk.word_tokenize(), set(stopwords.words('english')). week 2: Linguistic Analysis and Information Extraction covers concepts Introduction to Information Extraction, Named Entity Recognition (NER), Part-Of-Speech Tagging, Dependency Parsing and key functions NER(), spacy.load(), nlp.pipe(), spacy.load(), nlp(). week 3: Term Weighting Scheme and Topic Modelling covers concepts Term Weighting Schemes, Topic Modeling, Dimensionality Reduction and key functions TfidfVectorizer(), BM25Okapi(), gensim.models.LdaMulticore(), LsiModel(), NMF(), PCA(), TruncatedSVD(). week 4: Traditional Machine Learning Methods and NLP Applications covers concepts Text Classification, Clustering and key functions accuracy_score, f1_score, confusion_matrix,

In [74]:
def query_gpt_with_code_info(query, code_info):
    """
    Submits a query along with the formatted course information to GPT for processing,
    using the newer client.chat.completions.create interface.
    """
    messages = [
        {"role": "system", "content": f"{code_info} Your task is to provide a comprehensive response based on the code and concepts provided. "
        "if you are asked to provide code questions, the questions shoulde single choice questions testing understanding of the code and concept. Before the questions, you should provide the code segment"
        "if you are asked to provide questions (not specified code questions), the questions should have multiple choice questions testing understanding of the terms."
        "Finally, you should give the answer"},
        {"role": "user", "content": query}
    ]
    
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4-turbo",  # Adjust the model as needed
            messages=messages,
        )
        # Assuming we need to get the last message from the chat completion properly
        # Adjusting for the correct way to access the last message's content
        if chat_completion.choices and len(chat_completion.choices) > 0:
            last_message = chat_completion.choices[0].message.content.strip()
            return last_message
        else:
            return "No response was returned by GPT."
    except Exception as e:
        return f"Error querying GPT with course info: {e}"

In [75]:
import os
client = OpenAI(api_key=os.getenv("OPENAI_API")) 

In [76]:
query = "Please provide some code questions for Week 5."
response = query_gpt_with_code_info(query, course_info_summary)
print(response)

### Code Segment Presentations and Questions from Week 5 - "Evaluation Metrics and Word Embeddings"

**Code Segment 1:**
```python
metrics.roc_auc_score(y_true, y_scores)
```
**Question 1:**
What does the `roc_auc_score` function compute in the context of evaluation metrics?
- A) The relation between True Negative Rate and False Positive Rate.
- B) The area under the Receiver Operating Characteristic curve.
- C) The maximum score for precision.
- D) The total number of correct predictions.

**Answer:** B) The area under the Receiver Operating Characteristic curve.

---

**Code Segment 2:**
```python
nltk.translate.bleu_score.sentence_bleu(reference, candidate, smoothing_function=nltk.translate.bleu_score.SmoothingFunction().method1)
```
**Question 2:**
What is the purpose of the `sentence_bleu` function from the `nltk` library?
- A) It predicts the next word in a sentence based on context.
- B) It calculates the BLEU score to assess the quality of machine-translated text against a refe