In [13]:
!pip install flask

Collecting flask
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug>=3.0.0 (from flask)
  Downloading werkzeug-3.0.4-py3-none-any.whl.metadata (3.7 kB)
Collecting itsdangerous>=2.1.2 (from flask)
  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting click>=8.1.3 (from flask)
  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting blinker>=1.6.2 (from flask)
  Downloading blinker-1.8.2-py3-none-any.whl.metadata (1.6 kB)
Downloading flask-3.0.3-py3-none-any.whl (101 kB)
Downloading blinker-1.8.2-py3-none-any.whl (9.5 kB)
Downloading click-8.1.7-py3-none-any.whl (97 kB)
Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Downloading werkzeug-3.0.4-py3-none-any.whl (227 kB)
Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask
Successfully installed Werkzeug-3.0.4 blinker-1.8.2 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0


In [4]:
import pandas as pd
from pymongo import MongoClient

In [7]:
client = MongoClient('localhost', 27017)
db = client['twitter_db']
collection = db['tweets']

In [None]:
file_path = 'correct_twitter_202102.tsv'
chunk_size = 10000  # Adjust based on memory capacity
for chunk in pd.read_csv(file_path, sep='\t', chunksize=chunk_size):
    # Convert chunk to dictionary and insert into MongoDB
    collection.insert_many(chunk.to_dict('records'))

In [9]:
from datetime import datetime

def query_tweets(term):
    # Convert term to regex for case insensitive search
    query = {'text': {'$regex': term, '$options': 'i'}}
    
    # Daily Tweet Counts
    daily_counts = collection.aggregate([
        {'$match': query},
        {'$group': {'_id': {'$dateToString': {'format': "%Y-%m-%d", 'date': "$created_at"}}, 'count': {'$sum': 1}}}
    ])

In [None]:

    
    # Unique Users
    unique_users = collection.distinct('user_id', query)
    
    # Average Likes
    average_likes = collection.aggregate([
        {'$match': query},
        {'$group': {'_id': None, 'averageLikes': {'$avg': '$likes'}}}
    ])
    
    # Location Information
    location_counts = collection.aggregate([
        {'$match': query},
        {'$group': {'_id': '$place_id', 'count': {'$sum': 1}}}
    ])
    
    # Times of Day
    time_of_day = collection.aggregate([
        {'$match': query},
        {'$group': {'_id': {'$hour': '$created_at'}, 'count': {'$sum': 1}}}
    ])
    
    # Most Active User
    most_active_user = collection.aggregate([
        {'$match': query},
        {'$group': {'_id': '$user_id', 'count': {'$sum': 1}}},
        {'$sort': {'count': -1}},
        {'$limit': 1}
    ])
    
    return {
        "daily_counts": list(daily_counts),
        "unique_users": len(unique_users),
        "average_likes": list(average_likes),
        "location_counts": list(location_counts),
        "time_of_day": list(time_of_day),
        "most_active_user": list(most_active_user)
    }


In [None]:
docker run --name mongodb -d -p 27017:27017 mongo

In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/query', methods=['GET'])
def query_api():
    term = request.args.get('term')
    results = query_tweets(term)
    return jsonify(results)

if __name__ == '__main__':
    app.run(debug=True)
