In [3]:
import datetime
import sqlite3 as sql 
import requests
import re
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import json 
import networkx
from config_new import config
import time
import random as rnd


In [7]:
def init_users(con, headers):
    base_handels = {'NRSC':'NRSC',
        'DSCC':'DSCC',
        'DCCC':'DCCC',
        'NRCC':'NRCC',
        'RNC' :'GOP',
        'DNC':'DNC',
        'Twitter Verified':'verified'}
    base_handels = ','.join(base_handels.values())
    BASE_URL_QUERY = f'https://api.twitter.com/2/users/by?usernames={base_handels}&user.fields=created_at&expansions=pinned_tweet_id&tweet.fields=author_id,created_at'
    auth_response_QUERY = requests.get(BASE_URL_QUERY,  headers=headers)
    auth_response_RESPONSE = json.loads(auth_response_QUERY.text)
    cur = con.cursor()
    cur.execute("""CREATE TABLE IF NOT EXISTS twitter_base_users (user_id INTEGER, user_name TEXT, category TEXT)
    """)
    con.commit()
    for i in auth_response_RESPONSE['data']:
        cur.execute(f"""INSERT INTO twitter_base_users (user_id, user_name) VALUES ({i['id']},'{i['username']}')""")
        con.commit()
    groups = {'v':'verified', 'd':['DNC', 'dccc', 'dscc'], 'r':['NRSC', 'NRCC', 'GOP']}
    for key, value in groups.items():
        if isinstance(value,list):
            value = "','".join([f'{i}' for i in value])
        cur.execute(f"""UPDATE twitter_base_users SET category = '{key}' WHERE user_name IN ('{value}')""")
        con.commit()

def get_follows(base_id, con, headers, next_token=None):
    cur = con.cursor()
    cur.execute("""CREATE TABLE IF NOT EXISTS twitter_base_users (user_id INTEGER, user_name TEXT, parent_id INTEGER)
    """)
    con.commit()
    retry = True 
    while retry:   
        if next_token:
            BASE_URL_QUERY = f'https://api.twitter.com/2/users/{base_id}/following?user.fields=created_at&expansions=pinned_tweet_id&tweet.fields=created_at&max_results=1000&pagination_token={next_token}'
        else:
            BASE_URL_QUERY = f'https://api.twitter.com/2/users/{base_id}/following?user.fields=created_at&expansions=pinned_tweet_id&tweet.fields=created_at&max_results=1000'
        auth_response_QUERY = requests.get(BASE_URL_QUERY,  headers=headers)
        auth_response_RESPONSE = json.loads(auth_response_QUERY.text)
        if auth_response_QUERY.status_code == 429:
            time.sleep(900)
            auth_response_QUERY = requests.get(BASE_URL_QUERY,  headers=headers)
            auth_response_RESPONSE = json.loads(auth_response_QUERY.text)
        if auth_response_RESPONSE['data']:
            for i in auth_response_RESPONSE['data']:
                cur.execute(f"""INSERT INTO twitter_users (user_id, user_name, parent_id) VALUES ({i['id']},'{i['username']}', {base_id})""")
                con.commit()
        if 'next_token' in auth_response_RESPONSE['meta'].keys():
            get_follows(base_id, con, headers, auth_response_RESPONSE['meta']['next_token'])
            retry = False
        else:
            retry = False



#### Connection

In [5]:
bearertoken=config['bear_token']
headers = {'Authorization': 'Bearer ' +
       bearertoken, 'Content-Type': 'application/json'}
con = sql.connect(config['database'])
cur = con.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_forgien_lang_user (lang TEXT, user_id INT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_tweet_to_user (tweet_id INT, user_id INT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_tweet (tweet_id INT, conersation_id INT, text TEXT, likes INT, retweets INT, replay INT, quote INT, created TEXT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_nea (tweet_id INT, text INT, entity TEXT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_tweet_elements (tweet_id INT, mention_id INT, url TEXT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_tweet_conText_entity (tweet_id INT, entity_id INT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_tweet_conText_domain (tweet_id INT, domain_id INT)''')
cur.execute('''CREATE TABLE IF NOT EXISTS twitter_tweet_conText_def (id INT, name TEXT, description TEXT, type TEXT, PRIMARY KEY(id,type)) ''')
con.commit()


## Random Sampling
### Straff

In [8]:
groups = pd.read_sql_query("SELECT category FROM twitter_base_users",con)
groups = list(set(groups['category'].tolist()))
date = datetime.datetime.now() - datetime.timedelta(days=1)
date = date.strftime("%Y-%m-%dT00:00:00Z") 
for group in groups:
    limit = 300
    user_ids_list = pd.read_sql_query(f"""SELECT a.user_id
                                         FROM twitter_users a
                                         JOIN twitter_base_users b ON a.parent_id = b.user_id
                                         WHERE b.category = '{group}'
                                         ORDER BY RANDOM()
                                         LIMIT {limit}
                                         """,con)
    sample_set = list(set(user_ids_list['user_id'].tolist()))
    try:
        for s_id in sample_set:
            BASE_URL = f'https://api.twitter.com/2/users/{s_id}/tweets?max_results=100&start_time={date}&tweet.fields=id,author_id,context_annotations,conversation_id,created_at,entities,lang,public_metrics,text'
            auth_response_QUERY = requests.get(BASE_URL,  headers=headers)
            auth_response_RESPONSE = json.loads(auth_response_QUERY.text)
            if auth_response_RESPONSE['meta']['result_count'] > 0:
                if 'context_annotations' in list(auth_response_RESPONSE['data'][0].keys()):
                    break
    except:
        continue
    if 'context_annotations' in list(auth_response_RESPONSE['data'][0].keys()):
        break

In [23]:
if auth_response_RESPONSE['meta']['result_count'] > 0:
    input_dict = {
    'lang' : None,
    'hashtags' : [],
    'url' : [],
    'title' : [],
    'tweet_domain' : [],
    'tweet_entity' : [],
    'tweet_mentions' : [],
    'conversation_id' : None,
    'tweet_id' : auth_response_RESPONSE['data'][0]['id'],
    'text' : auth_response_RESPONSE['data'][0]['text'].replace("'","''"),
    'author_id' : auth_response_RESPONSE['data'][0]['author_id'],
    'created_at' : auth_response_RESPONSE['data'][0]['created_at'],
    'public_metrics': auth_response_RESPONSE['data'][0]['public_metrics'],
    'nea' : []
    }
    if 'lang' in list(auth_response_RESPONSE['data'][0].keys()):
        input_dict['lang'] = auth_response_RESPONSE['data'][0]['lang']
        if input_dict['lang'] != 'en':
            cur.execute(f"DELETE FROM twitter_users WHERE user_id = {input_dict['author_id']}")
            cur.execute(f"INSERT INTO twitter_forgien_lang_user (lang, user_id) VALUES ('{input_dict['lang']}', {input_dict['author_id']}) ")
            con.commit()
            #continue
    if 'hashtags' in list(auth_response_RESPONSE['data'][0]['entities'].keys()):
        for i in auth_response_RESPONSE['data'][0]['entities']['hashtags']:
            input_dict['hashtags'].append(i['tag'])
    if 'url' in list(auth_response_RESPONSE['data'][0]['entities'].keys()):
        for i in auth_response_RESPONSE['data'][0]['entities']['urls']:
            if 'unwound_url' in list(i.keys()):
                input_dict['url'].append(i['unwound_url'])
                input_dict['title'].append(i['title'])
    if 'annotations' in list(auth_response_RESPONSE['data'][0]['entities'].keys()):
        for i in auth_response_RESPONSE['data'][0]['entities']['annotations']:
            i.pop("start")
            i.pop("end")
            i.pop("probability")
            input_dict['nea'].append(i)
    if 'context_annotations' in list(auth_response_RESPONSE['data'][0].keys()):
        for i in auth_response_RESPONSE['data'][0]['context_annotations']:
            input_dict['tweet_domain'].append(i['domain'])
            input_dict['tweet_entity'].append(i['entity'])
    if 'mentions' in list(auth_response_RESPONSE['data'][0]['entities'].keys()):
        for i in auth_response_RESPONSE['data'][0]['entities']['mentions']:
            i.pop("start")
            i.pop("end")
            input_dict['tweet_mentions'].append(i)
    if 'conversation_id' in list(auth_response_RESPONSE['data'][0].keys()):
        input_dict['conversation_id'] = auth_response_RESPONSE['data'][0]['conversation_id']
    tweet_query = f"INSERT INTO twitter_tweet VALUES ({input_dict['tweet_id']},{input_dict['conversation_id']},'{input_dict['text']}', {input_dict['public_metrics']['like_count']}, {input_dict['public_metrics']['retweet_count']}, {input_dict['public_metrics']['reply_count']}, {input_dict['public_metrics']['quote_count']},'{input_dict['created_at']}')"
    user_tweet_join_query = f"INSERT INTO twitter_tweet_to_user VALUES ({input_dict['tweet_id']},{input_dict['author_id']})"
    for i in input_dict['tweet_entity']:
        conText_e = f"INSERT INTO twitter_tweet_conText_entity ({input_dict['tweet_id']}, {i['id']})"
        conDef = f"INSERT INTO twitter_tweet_conText_def VALUES ({i['id']}, '{i['name']}', '{i['description']}', 'entity') ON CONFLICT(id,type) DO NOTHING"
    for i in input_dict['tweet_domain']:
        conText_e = f"INSERT INTO twitter_tweet_conText_domain ({input_dict['tweet_id']}, {i['id']})"
        conDef = f"INSERT INTO twitter_tweet_conText_def VALUES ({i['id']}, '{i['name']}', '{i['description']}', 'domain') ON CONFLICT(id,type) DO NOTHING"
    