<a href="https://colab.research.google.com/github/Gangaraju1411/Databricks/blob/main/Student%20Questions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
%pip install  pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:
from faker import Faker
import pandas as pd

# Initialize Faker instance
fake = Faker()

# Function to generate student data
def generate_student_data(num_records):
    students = []
    for _ in range(num_records):
        student = {
            'Name': fake.name(),
            'Class': fake.random_element(elements=('10th', '11th', '12th', 'Undergraduate', 'Postgraduate')),
            'Read Text': fake.text(max_nb_chars=300),  # 6-5 lines of text
            'Phone Number': fake.phone_number()
        }
        students.append(student)
    return students

# Generate 50 student records
student_data = generate_student_data(50)

# Create a DataFrame
df = pd.DataFrame(student_data)

# Save to CSV
df.to_csv('student_data.csv', index=False)

# Display the first few records
print(df.head())


In [None]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.model_selection import train_test_split

# Load pre-trained DistilBERT model and tokenizer
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)

# Prepare dataset
class QuizDataset(torch.utils.data.Dataset):
    def __init__(self, questions, tokenizer, max_len):
        self.questions = questions
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        inputs = self.tokenizer(question['text'], max_length=self.max_len, truncation=True, padding='max_length', return_tensors='pt')
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze(),
            'labels': torch.tensor(question['label'])
        }

# Example dataset (use actual data for real training)
questions = [{'text': 'Sample question?', 'label': 1}]
train_data = QuizDataset(questions, tokenizer, max_len=128)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=8, shuffle=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    logging_dir='./logs',
)

# Train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
)

trainer.train()

# Save the model
model.save_pretrained('question_generation_model')
tokenizer.save_pretrained('question_generation_model')


In [None]:
import pandas as pd
import json

# Example DataFrame for demonstration
data = {
    'Read Text': [
        "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice, 'without pictures or conversations?'",
        "The sun was setting behind the hills, casting a golden glow over the landscape. The birds sang their evening songs as the breeze gently rustled through the trees, creating a serene and peaceful atmosphere.",
        "In the heart of the bustling city, a small café provided a sanctuary for those seeking refuge from the chaos of urban life. The aroma of freshly brewed coffee and the soft hum of conversation created a comforting and inviting space.",
        "The detective examined the crime scene carefully, noting every detail with precision. The clues were scattered around, and each one had to be analyzed thoroughly to piece together the mystery.",
        "The voyage across the ocean was long and arduous, but the sight of land on the horizon filled the crew with renewed hope and excitement. They had braved the storms and challenges of the sea, and their destination was finally within reach."
    ]
}
df = pd.DataFrame(data)

# Generate quiz questions
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Initialize the text generation pipeline
text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

def generate_quiz_questions(texts, num_questions=5):
    questions = []
    for text in texts:
        # Generate questions using GPT-2
        generated_texts = text_generator(
            text,
            max_length=150,   # Adjusted for reasonable length
            max_new_tokens=100,  # Number of tokens to generate
            num_return_sequences=num_questions,
            truncation=True
        )
        for generated_text in generated_texts:
            questions.append({'question': generated_text['generated_text'].strip(), 'choices': [], 'answer': generated_text['generated_text'].strip()})
    return questions

quiz_questions = generate_quiz_questions(df['Read Text'].tolist(), num_questions=5)

# Save quiz questions to JSON
with open('quiz_questions.json', 'w') as f:
    json.dump(quiz_questions, f, indent=4)

print("Quiz questions generated and saved to quiz_questions.json")


In [None]:
from flask import Flask, request, jsonify
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline
import json

app = Flask(__name__)

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Initialize the text generation pipeline
text_generator = pipeline('text-generation', model=model, tokenizer=tokenizer)

def generate_quiz_questions(texts, num_questions=5):
    questions = []
    for text in texts:
        # Generate questions using GPT-2
        generated_texts = text_generator(
            text,
            max_length=150,   # Adjusted for reasonable length
            max_new_tokens=100,  # Number of tokens to generate
            num_return_sequences=num_questions,
            truncation=True
        )
        for generated_text in generated_texts:
            questions.append({'question': generated_text['generated_text'].strip(), 'choices': [], 'answer': generated_text['generated_text'].strip()})
    return questions

@app.route('/generate-quiz', methods=['POST'])
def generate_quiz():
    data = request.json
    if not data or 'texts' not in data:
        return jsonify({'error': 'No texts provided'}), 400

    texts = data['texts']
    num_questions = data.get('num_questions', 5)  # Default to 5 questions if not specified

    try:
        # Generate quiz questions
        quiz_questions = generate_quiz_questions(texts, num_questions=num_questions)
        return jsonify(quiz_questions)
    except Exception as e:
        return jsonify({'error': str(e)}), 500

# Run the Flask app with ngrok tunnel
if __name__ == '__main__':
    from pyngrok import ngrok

    # Set your ngrok auth token
    ngrok.set_auth_token('2lgUBseRGyAgeoEWcaJMlfOqjWJ_SnxETCPK9ZpzRZeiLazC')

    # Start ngrok and open a tunnel to the Flask app
    public_url = ngrok.connect(5000)
    print(f" * Ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:5000\"")

    # Start the Flask app
    app.run(port=5000)


 * Ngrok tunnel "NgrokTunnel: "https://3c8a-34-105-100-3.ngrok-free.app" -> "http://localhost:5000"" -> "http://127.0.0.1:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [12/Sep/2024 07:03:09] "[31m[1mPOST /generate-quiz HTTP/1.1[0m" 400 -
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=100) and `max_length`(=3000000000000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
INFO:werkzeug:127.0.0.1 - - [12/Sep/2024 07:11:03] "POST /generate-quiz HTTP/1.1" 200 -
