# API testing

In [2]:
from dotenv import load_dotenv
import os
import tweepy
import time
import json
import sys

def get_client():
    # load API keys
    load_dotenv()
    CONSUMER_KEY = os.getenv('CONSUMER_KEY')
    CONSUMER_SECRET = os.getenv('CONSUMER_SECRET')
    ACCESS_TOKEN= os.getenv('ACCESS_TOKEN')
    ACCESS_TOKEN_SECRET = os.getenv('ACCESS_TOKEN_SECRET')
    BEARER_TOKEN=os.getenv('BEARER_TOKEN')

    client = tweepy.Client(
        bearer_token=BEARER_TOKEN,
        consumer_key=CONSUMER_KEY,
        consumer_secret=CONSUMER_SECRET,
        access_token=ACCESS_TOKEN,
        access_token_secret=ACCESS_TOKEN_SECRET)
    user = client.get_me()
    if not user.data:
        raise Exception('[ERROR] unable to make requests with Bearer Token, check your .env file and API keys!')
    
    print(f'[INFO] authenticated using account @{user.data.username}')

    return client

def get_streaming_client():
    """
    overload methods from inherited class to customize processing of incoming stream data
    """
    class CustomSC(tweepy.StreamingClient):

        def on_tweet(self, tweet):
            print(tweet.data)
            with open('test_data/tweets.txt', 'a') as f:
                f.write(json.dumps(tweet.data)+'\n')
    
    load_dotenv()
    BEARER_TOKEN=os.getenv('BEARER_TOKEN')
    return CustomSC(bearer_token=BEARER_TOKEN)

def check_rule(streaming_client: tweepy.StreamingClient, rule: str):
    rules_response = streaming_client.get_rules()
    rules= [tweepy.StreamRule(value=rule, tag='default')]

    if not rules_response.data:
        print('[INFO] no rules found, adding rule')
        return streaming_client.add_rules(add=rules)

    elif rules_response.data and rules_response.data[0].value != rule:
        print('[INFO] updating old rule')
        streaming_client.delete_rules(rules_response.data[0].id)
        return streaming_client.add_rules(add=rules)

    elif rules_response.data[0].value == rule:
        print('[INFO] current rule matches')
        return streaming_client.get_rules()


def main(streaming_client: tweepy.StreamingClient):

    streaming_client.filter(threaded=True)
    
    i = 0
    time_to_listen = 10
    print(f'[INFO] listening for {time_to_listen} seconds...')
    for i in range(1,time_to_listen):
        time.sleep(1)
        i += 1
    print("[INFO] done streaming")
    
    streaming_client.disconnect()

    return 0




In [11]:


streaming_client = get_streaming_client()
client=get_client()

[INFO] authenticated using account @detectatron3000


In [26]:
streaming_client.filter(tweet_fields='id,text,referenced_tweets')

{'edit_history_tweet_ids': ['1598859249496444930'], 'id': '1598859249496444930', 'referenced_tweets': [{'type': 'replied_to', 'id': '1598858635698114562'}], 'text': '@sneakygollum @KomodoLord @KantoChris @TeacherTempest @D_Boi408 @JennTansley @TeejMills @shunyLucario @Rachel1991Burke @wingedaero @firemasterMT @SteemyBonBon Oh trust me I know. I caught all of mine at full health with luxury balls. I was to scared I might kill it. Even though I save I’m always hesitant to trust just closing the game and trying again. So I try not to take risks lol.'}
{'edit_history_tweet_ids': ['1598859249857093634'], 'id': '1598859249857093634', 'referenced_tweets': [{'type': 'replied_to', 'id': '1598852152327213056'}], 'text': '@amolinguas @BrassVon @BumpstockBarbie @travisdon1981 And also, the "Publican v Pharisee" parable'}
{'edit_history_tweet_ids': ['1598859252743176195'], 'id': '1598859252743176195', 'referenced_tweets': [{'type': 'replied_to', 'id': '1598776016096448514'}], 'text': '@KamalaHarris

KeyboardInterrupt: 

In [10]:
t = '{"edit_history_tweet_ids": ["1598851902548025346"], "id": "1598851902548025346", "referenced_tweets": [{"type": "retweeted", "id": "1598831435288563712"}], "text": "RT @mtaibbi: 18. Twitter took extraordinary steps to suppress the story, removing links and posting warnings that it may be \u201cunsafe.\u201d They\u2026"}'
import json 
d = json.loads(t)
d

{'edit_history_tweet_ids': ['1598851902548025346'],
 'id': '1598851902548025346',
 'referenced_tweets': [{'type': 'retweeted', 'id': '1598831435288563712'}],

# data set wrangling

In [1]:
import pandas as pd

In [6]:
df_gpt = pd.read_csv('data/gpt.tsv', sep='\t', header=0, on_bad_lines='skip')
df_gpt['human'] = 0
df_gpt['reply_text'] = df_gpt['reply_text'].str.replace('\n', '')
df_gpt 

Unnamed: 0,tweet_id,tweet_text,reply_id,reply_text,human
0,1602798018587234304,@SettTheBoss_ Ohh I won't I promise you I be g...,cmpl-6N8UUuAbdW1HuboWiLpIe4seLzoiw,"Well, I'm a 6'7"" giant, so I'm a bit tall, hah...",0
1,1602797717105029120,lmao good thing I overslept,cmpl-6N8UWaBmuF83cgk94wNTZtz708ocE,Glad you got the extra rest! It's always nice ...,0
2,1602782118303580161,@Tohabisha @LotteSara This tweet be going craz...,cmpl-6N8UXStT4zA7Karfa09VWMGPUUoN9,"Glad to see everyone's having fun, but let's k...",0
3,1602796335954108417,@ImJohnEli Yeah it used to be Nuclear fusion i...,cmpl-6N8UYzuMsqLjw20pF5wUBGXXg43xL,It certainly looks like a breakthrough! It's s...,0
4,1601624202041491456,@sjynxs This did numbers!!!,cmpl-6N8UZQ0q6TKHpROluqV0JmE36pcJA,Nice! Glad to hear it did well. Congrats!,0
...,...,...,...,...,...
6312,1602798346267156481,@catrasdeans i’ll never understand why netflix...,cmpl-6NCUGSWVNuXzMkqFAF2VsrwCDHJ2U,It's time for Netflix to stop marginalizing LG...,0
6313,1602795608275881985,@TeslaSynopsis Straight facts. Diamond hands t...,cmpl-6NCUHnghIHzFQmpNwBVc5RL4FWCfs,"Let's do it! 🤩 With #Tesla leading the way, it...",0
6314,1602798815035244544,@bparispoker why aggressively bro lol. also i...,cmpl-6NCUIM0FMdwunrzFQdMdvIRSp3Evc,It totally depends on the situation! But good ...,0
6315,1602764499202805761,So I still want to build a site where sellers ...,cmpl-6NCUJcAJegNCbvc7mD7uBPzXCSKw0,"It can be done, but the budget may be too low,...",0


In [28]:
df_human = pd.read_csv('data/tweets.tsv', sep='\t', header=0, on_bad_lines='skip')
df_human = df_human[['tweet_text']].rename(columns={'tweet_text': 'reply_text'}).dropna()

import re

def remove_usernames(text: str):
    return re.sub(r'@\w+(\s)', r'\1', text)


df_human['reply_text'] = df_human['reply_text'].apply(lambda val: remove_usernames(val))
df_human.head(50)

Unnamed: 0,reply_text
0,“Yeah! Not quite a human you see. That’s why....
1,yeah 😕
2,ngl at this point you should give them a re...
3,Buzz kill. 🤪
4,it actually didn't
5,What's the correct score for the Final then?
6,Like Apple? Create a phone already Elon! I’m...
7,And also world best
8,Sure!!!!😂
9,Zero. But I've got a few 4.5s and a lot of 4s


In [18]:
import re
def remove_usernames(text: str):
    return re.sub(r'(@\w+)', r'\1', text)

remove_usernames('@TaigaKagami89 “Yeah! Not quite a human you see. That’s why. And you better be, don’t want me to be mad don’t ya?”   The taller male grunted with a little smirk and winking back.')

'@TaigaKagami89 “Yeah! Not quite a human you see. That’s why. And you better be, don’t want me to be mad don’t ya?”   The taller male grunted with a little smirk and winking back.'

In [50]:
def _read_from_tsv(machine_replies_fn: str, human_replies_fn: str):
    # read data from .tsv files
    df_gpt = pd.read_csv(machine_replies_fn, sep='\t', header=0, on_bad_lines='skip')
    df_human = pd.read_csv(human_replies_fn, sep='\t', header=0, on_bad_lines='skip')
    
    df_gpt['reply_text'] = df_gpt['reply_text'].str.replace('\n', '') # fix read-in bug/errors
    df_gpt['human'] = 0
    df_human['human'] = 1

    return df_gpt, df_human

df_gpt, df_human = _read_from_tsv('data/gpt.tsv', 'data/tweets.tsv')

# fix column names and join
df_gpt = df_gpt.rename(columns={'reply_text': 'gpt3'})[['tweet_id', 'gpt3']]
df_human = df_human.rename(columns={'parent_id': 'tweet_id', 'tweet_id': 'human_reply_id'})[['tweet_id', 'human_reply_id']]

df = df_human.join(df_gpt.set_index('tweet_id'), on='tweet_id')

df = df.drop_duplicates('tweet_id')
df



Unnamed: 0,tweet_id,human_reply_id,gpt3
0,1602798018587234304,1.602798e+18,"Well, I'm a 6'7"" giant, so I'm a bit tall, hah..."
1,1602797717105029120,1.602798e+18,Glad you got the extra rest! It's always nice ...
2,1602782118303580161,1.602798e+18,"Glad to see everyone's having fun, but let's k..."
3,1602796335954108417,1.602798e+18,It certainly looks like a breakthrough! It's s...
4,1601624202041491456,1.602798e+18,Nice! Glad to hear it did well. Congrats!
...,...,...,...
6314,1602798346267156481,1.602799e+18,It's time for Netflix to stop marginalizing LG...
6315,1602795608275881985,1.602799e+18,"Let's do it! 🤩 With #Tesla leading the way, it..."
6316,1602798815035244544,1.602799e+18,It totally depends on the situation! But good ...
6317,1602764499202805761,1.602799e+18,"It can be done, but the budget may be too low,..."


In [52]:
df = df.dropna()
df.astype({'human_reply_id': 'int64'})

Unnamed: 0,tweet_id,human_reply_id,gpt3
0,1602798018587234304,1602798438986518528,"Well, I'm a 6'7"" giant, so I'm a bit tall, hah..."
1,1602797717105029120,1602798436478222336,Glad you got the extra rest! It's always nice ...
2,1602782118303580161,1602798438210494464,"Glad to see everyone's having fun, but let's k..."
3,1602796335954108417,1602798439737311232,It certainly looks like a breakthrough! It's s...
4,1601624202041491456,1602798438827102208,Nice! Glad to hear it did well. Congrats!
...,...,...,...
6314,1602798346267156481,1602799058879655936,It's time for Netflix to stop marginalizing LG...
6315,1602795608275881985,1602799057109671936,"Let's do it! 🤩 With #Tesla leading the way, it..."
6316,1602798815035244544,1602799060263784448,It totally depends on the situation! But good ...
6317,1602764499202805761,1602799060569755648,"It can be done, but the budget may be too low,..."


In [None]:
df_human_r = df_human.rename(columns={'parent_id': 'tweet_id', 'tweet_id': 'human_reply_id'})[['tweet_id', 'human_reply_id']]


df_gpt_r = df_gpt.rename(columns={'reply_text': 'gpt3'})[['tweet_id', 'gpt3']]


df_i = df_human_r.join(df_gpt_r.set_index('tweet_id'), on='tweet_id', how='outer')
df_i.dropna()
df_i = df_i.drop_duplicates('tweet_id') # keep only one human-machine pair per original tweet
df_i

Unnamed: 0,tweet_id,human_reply_id,gpt3
0,1601625024422907909,1601626951768473600,We'll have to look at the data to see if this ...
1,1601576051330588672,1601626952921587713,"Yes, Brazil has been super successful in Inter..."
2,1601626895811870720,1601626953920163840,"Nah, he just got a really nice refund—no funny..."
3,1601588982047203328,1601626952825438208,I'm glad you raised this concern! We want to b...
4,1601626515468582915,1601626953572052992,Sorry you feel that way! We can all appreciate...
...,...,...,...
1185,1601626681818894337,1601627055917256704,Ronaldo is a professional soccer (football) pl...
1186,1601626349524746241,1601627051584208896,That’s too bad! There are lots of other ways t...
1187,1600924065614958592,1601627055569145856,Love is a complex emotion and cannot be reduce...
1188,1601578319035916288,1601627059372969984,That sounds rough! Let me know if you find out...


In [None]:
df_i.to_csv('test_data/test.tsv', sep='\t', index=False)

In [12]:
import pandas as pd
import re


def _read_from_tsv(machine_replies_fn: str, human_replies_fn: str):
    # read data from .tsv files
    df_gpt = pd.read_csv(machine_replies_fn, sep='\t',
                         header=0, on_bad_lines='skip')
    df_human = pd.read_csv(human_replies_fn, sep='\t',
                           header=0, on_bad_lines='skip')

    df_gpt['reply_text'] = df_gpt['reply_text'].str.replace(
        '\n', '')  # fix read-in bug/errors
    df_gpt['human'] = 0
    df_human['human'] = 1

    return df_gpt, df_human


df_gpt, df_human = _read_from_tsv('data/gpt.tsv', 'data/tweets.tsv')


df_gpt_c = df_gpt[['tweet_id', 'reply_text', 'human']]
df_human_c = df_human[['parent_id', 'tweet_text', 'human']].rename(
    columns={'parent_id': 'tweet_id', 'tweet_text': 'reply_text'})

# process data here


def remove_usernames(text: str):
    return re.sub(r'@\w+(\s)', r'\1', text)


df_human_c['reply_text'] = df_human_c['reply_text'].dropna().apply(
    lambda val: remove_usernames(val))

df_combined = pd.concat([df_gpt_c, df_human_c], axis=0)
df_combined


Unnamed: 0,tweet_id,reply_text,human
0,1602798018587234304,"Well, I'm a 6'7"" giant, so I'm a bit tall, hah...",0
1,1602797717105029120,Glad you got the extra rest! It's always nice ...,0
2,1602782118303580161,"Glad to see everyone's having fun, but let's k...",0
3,1602796335954108417,It certainly looks like a breakthrough! It's s...,0
4,1601624202041491456,Nice! Glad to hear it did well. Congrats!,0
...,...,...,...
6314,1602798346267156481,Same here. They just don’t like when wlw/nblw...,1
6315,1602795608275881985,Let's goooo,1
6316,1602798815035244544,Yeah tweet didn't seem to get much traction t...,1
6317,1602764499202805761,You can build that for way less bro,1
