In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
!git clone https://github.com/SomersInias/AI-PromptInsights

Cloning into 'AI-PromptInsights'...
remote: Enumerating objects: 33, done.[K
remote: Counting objects: 100% (22/22), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 33 (delta 5), reused 17 (delta 3), pack-reused 11 (from 1)[K
Receiving objects: 100% (33/33), 157.21 MiB | 12.27 MiB/s, done.
Resolving deltas: 100% (5/5), done.
Updating files: 100% (6/6), done.


In [2]:
!unzip /content/AI-PromptInsights/data/RandomUsers_prompts.zip -d /content/extracted_data

Archive:  /content/AI-PromptInsights/data/RandomUsers_prompts.zip
  inflating: /content/extracted_data/RandomUsers_prompts.csv  


In [6]:
# Install necessary libraries
!pip install flask pyngrok

import os
import threading
import time
import pandas as pd
from flask import Flask, request, jsonify, Response
from pyngrok import ngrok
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
import spacy
from io import BytesIO
from requests_toolbelt.multipart.encoder import MultipartEncoder


# Load spaCy model
nlp = spacy.load("en_core_web_sm")
nlp.max_length = 150_000_000  # Increase as needed

# Load dataset
df = pd.read_csv("/content/extracted_data/RandomUsers_prompts.csv")


# Replace ngrok_auth_token with your actual authtoken from the ngrok dashboard

# Read the ngrok auth token from the txt file
with open('/content/drive/MyDrive/ngrok_token.txt', 'r') as file:
    ngrok_auth_token = file.read().strip()

# Authenticate ngrok with the token read from the file
!ngrok config add-authtoken {ngrok_auth_token}

# Flask app setup
app = Flask(__name__)

# Analysis function (from your code)
def analyze_user_prompts(user_id, df):
    user_prompts = df[df['userId'] == user_id]['prompt'].dropna()
    all_prompts = " ".join(user_prompts)
    doc = nlp(all_prompts)
    tokens = [token.text.lower() for token in doc if not token.is_stop and not token.is_punct and len(token.text) > 2]
    word_freq = Counter(tokens)
    return word_freq

# Plot function (from your code)
def plot_horizontal_word_frequencies(sorted_word_freq, top_n=100):
    words, frequencies = zip(*sorted_word_freq[:top_n])
    plt.figure(figsize=(12, 16))
    plt.barh(words, frequencies, color='skyblue')
    plt.xlabel('Frequency', fontsize=14)
    plt.ylabel('Words', fontsize=14)
    plt.title(f'Top {top_n} Word Frequencies', fontsize=16)
    plt.gca().invert_yaxis()
    plt.tight_layout()
    buffer = BytesIO()
    plt.savefig(buffer, format='png')
    plt.close()
    buffer.seek(0)
    return buffer

# WordCloud function (from your code)
def generate_word_cloud(sorted_word_freq):
    word_freq_dict = dict(sorted_word_freq)
    wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='viridis').generate_from_frequencies(word_freq_dict)
    plt.figure(figsize=(12, 6))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    buffer = BytesIO()
    plt.savefig(buffer, format='png')
    plt.close()
    buffer.seek(0)
    return buffer

# API route for generating word frequency plot (PNG)
@app.route('/word_frequencies', methods=['GET'])
def word_frequencies():
    user_id = request.args.get('user_id', type=int)
    x = request.args.get('x', default=20, type=int)

    if user_id is None:
        return jsonify({"status": "error", "message": "user_id is required"}), 400

    try:
        # Analyze data
        word_freq = analyze_user_prompts(user_id, df)
        sorted_word_freq = word_freq.most_common()

        # Generate word frequency plot
        plot_buffer = plot_horizontal_word_frequencies(sorted_word_freq, top_n=x)

        # Return the plot as PNG
        return Response(plot_buffer, mimetype='image/png')

    except Exception as e:
        return jsonify({"status": "error", "message": str(e)}), 500

# API route for generating word cloud (PNG)
@app.route('/word_cloud', methods=['GET'])
def word_cloud():
    user_id = request.args.get('user_id', type=int)
    x = request.args.get('x', default=20, type=int)

    if user_id is None:
        return jsonify({"status": "error", "message": "user_id is required"}), 400

    try:
        # Analyze data
        word_freq = analyze_user_prompts(user_id, df)
        sorted_word_freq = word_freq.most_common()

        # Generate word cloud
        wordcloud_buffer = generate_word_cloud(sorted_word_freq[:x])

        # Return the word cloud as PNG
        return Response(wordcloud_buffer, mimetype='image/png')

    except Exception as e:
        return jsonify({"status": "error", "message": str(e)}), 500

# Run Flask app and ngrok
def run_flask():
    app.run(port=5000)

flask_thread = threading.Thread(target=run_flask)
flask_thread.start()

time.sleep(5)
public_url = ngrok.connect(5000)
print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:5000\"")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


 * ngrok tunnel "NgrokTunnel: "https://0bc9-35-196-231-166.ngrok-free.app" -> "http://localhost:5000"" -> "http://127.0.0.1:5000"
