In [1]:
!pip install -U datasets
!pip install wandb
!pip install torch
!pip install Cython

!pip install torch torchvision torchaudio
!pip install -U transformers
!pip install peft
!pip install -U bitsandbytes
!pip install tensorboard
!pip install accelerate -U
!pip install pandas
!pip install tqdm
!pip install paramiko scp

Collecting datasets
  Downloading datasets-2.17.1-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=12.0.0 (from datasets)
  Downloading pyarrow-15.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2023.10.0,>=2023.1.0 (from fsspec[http]<=2023.10.0,>=2023.1.0->datasets)
  Downloading fsspec-2023.10.0-py3-none-any.whl.metadata (6.8 kB)
Collecting aiohttp (from data

In [2]:
from datetime import datetime
from tqdm import tqdm
import os
import sys
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer

from peft import (
    PeftModel,
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)

base_model = "mistralai/Mistral-7B-v0.1"
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

model = PeftModel.from_pretrained(model, "dohonba/mistral_7b_fingpt")
model.eval()

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

adapter_config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/336M [00:00<?, ?B/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralSdpaAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear8bitLt(
                (base_l

In [3]:
# eval_prompt = """
# Is this sentence self-promotional? Answer with {no/yes}? "Building brick by brick, our analysts motto! Pay a visit to our Community".
# """

# model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

# with torch.no_grad():
#     output = model.generate(**model_input, max_new_tokens=150)[0]
#     decoded_output = tokenizer.decode(output, skip_special_tokens=True)

# print(decoded_output)

In [3]:
# Function to classify emotion of a sentence
def classify_sentiment(sentence):
    eval_prompt = f"""Context: {sentence}

Question: 'What is the sentiment of this sentence? Please choose an answer from {{strong negative/moderately negative/mildly negative/neutral/mildly positive/moderately positive/strong positive}}.'
"""
    model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**model_input, max_new_tokens=150)[0]
        decoded_output = tokenizer.decode(output, skip_special_tokens=True)

    answer = decoded_output.split("Answer: ", 1)[1]
    return answer  # You might need to further process this to extract the emotion

def classify_emotion(sentence):
    eval_prompt = f"""Context: {sentence}

Question: 'What is the emotion shown in this text? Please choose an answer from {{anger/fear/joy/love/sadness/surprise/neutral}}'.
"""
    model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**model_input, max_new_tokens=150)[0]
        decoded_output = tokenizer.decode(output, skip_special_tokens=True)

    answer = decoded_output.split("Answer: ", 1)[1]
    return answer  # You might need to further process this to extract the emotion

In [4]:
# classify_emotion("I love it. Thanks.")

In [5]:
import paramiko
from scp import SCPClient

def create_ssh_client(server, port, user, password):
    client = paramiko.SSHClient()
    client.load_system_host_keys()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    client.connect(server, port, user, password, compress=True)
    return client

def upload_files(ssh_client, local_path, remote_path):
    with SCPClient(ssh_client.get_transport()) as scp:
        scp.put(local_path, remote_path)  # Use put for uploading

def append_file(ssh_client, local_path, remote_path, temp_path):
    # Step 1: Transfer the file to a temporary location
    upload_files(ssh_client, local_path, temp_path)

    temp_path = temp_path.replace("/", "\\")
    remote_path = remote_path.replace("/", "\\")

    # Step 2: Append the content of the temporary file to the target file
    command = f'type {temp_path} >> {remote_path} & del {temp_path}'
    stdin, stdout, stderr = ssh_client.exec_command(command)
    exit_status = stdout.channel.recv_exit_status()  # Wait for the command to complete
    
    # Reading the output of the command
    output = stdout.read().decode('utf-8')
    error = stderr.read().decode('utf-8')

    # Check if command was successful
    if exit_status == 0:
        print("Command executed successfully")
    else:
        print(f"Command failed with exit status {exit_status}")

    # Optional: Print the outputs for debugging or logging
    if output:
        print("Output:", output)
        
    if error:
        print("Error:", error)

    return not exit_status

def download_files(ssh_client, remote_path, local_path):
    with SCPClient(ssh_client.get_transport()) as scp:
        scp.get(remote_path, local_path)

# Optional: Execute a command or run a script on the remote machine
# stdin, stdout, stderr = ssh_client.exec_command('python /path/to/remote/script.py')
# print(stdout.read().decode())  # Assuming the script has output

def close_client(ssh_client):
    # Close the SSH connection
    ssh_client.close()

In [None]:
import json
from datetime import datetime, timedelta
import pandas as pd
import pytz
from tqdm import tqdm
import time
from dateutil import parser

server = 'sshhop.hopto.org'
port = 22
user = 'mum'
password = '1234'

ticker = "TSLA"

def download_tweets(file_path):
    ssh_client = create_ssh_client(server, port, user, password)
    download_files(ssh_client, f'/C:/Users/Mum/Documents/news_aggregation_ipynb/{file_path}', file_path)
    close_client(ssh_client)

def upload_processed_tweets(localprocessed_file_name, final_tweets_file_name):
    ssh_client = create_ssh_client(server, port, user, password)
    final_tweets_file_path = f"C:/Users/Mum/Documents/news_aggregation_ipynb/{final_tweets_file_name}"
    temp_final_tweets_file_path = f"C:/Users/Mum/Documents/news_aggregation_ipynb/temp_{final_tweets_file_name}"
    print(final_tweets_file_path)
    print(temp_final_tweets_file_path)
    result = append_file(ssh_client, f'./{localprocessed_file_name}', final_tweets_file_path, temp_final_tweets_file_path)
    # Delete the file
    close_client(ssh_client)
    return result

def compare_tweets_and_return_new(tweets_file_path, final_tweets_file_path):
    # Download recent tweets
    download_tweets(tweets_file_path)
    download_tweets(final_tweets_file_path)

    # Load IDs from the final tweets file
    final_tweet_ids = set()
    with open(final_tweets_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            tweet = json.loads(line)
            final_tweet_ids.add(tweet['id'])

    # Load tweets from the initial file and filter out those that exist in the final file
    new_tweets = []
    with open(tweets_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            tweet = json.loads(line)
            if tweet['id'] not in final_tweet_ids:
                # Aggregate created_at and date, then convert to datetime
                post_time = tweet.get('created_at') or tweet.get('date')
                tweet['post_time'] = parser.parse(post_time)
                new_tweets.append(tweet)

    new_tweets.sort(key=lambda x: x['post_time'], reverse=True)
    return new_tweets

def save_processed_tweets(localprocessed_file_path, tweets):
    with open(localprocessed_file_path, 'w', encoding='utf-8') as file:
        for tweet in tweets:
            # Create a new dictionary with only the required fields
            filtered_tweet = {
                'id': tweet['id'],  # Assuming each tweet has a unique 'id'
                'date': tweet.get('date', tweet.get('created_at', '')),
                'cleanContent': tweet.get('cleanContent', ''),
                'rawContent': tweet.get('rawContent', ''),
                'url': tweet.get('url', ''),
                'emotion': tweet.get('emotion', ''),
                'sentiment': tweet.get('sentiment', '')
            }
            json.dump(filtered_tweet, file)
            file.write('\n')

# Calculate the cutoff time for the last 30 minutes
# cutoff_time = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(minutes=30)

twitter_file_path = f'{ticker}_tweets.jsonl'
final_twitter_file_path = f'final_{twitter_file_path}'
stocktweets_file_path = f'{ticker}_stocktweets.jsonl'
final_stocktweets_file_path = f'final_{stocktweets_file_path}'

def process_and_save_tweets(tweets, ticker, final_tweets_file_path, localprocessed_file_path):
    sentiment_results = []
    emotion_results = []
    counter = 0
    to_save = []

    for i, tweet in enumerate(tqdm(tweets, desc="Analyzing Tweets")):
        clean_content = tweet.get('cleanContent', '')
        
        sentiment = classify_sentiment(clean_content)
        emotion = classify_emotion(clean_content)
        
        sentiment_results.append(f"{i}: " + sentiment)
        emotion_results.append(f"{i}: " + emotion)
        
        tweet['sentiment'] = sentiment
        tweet['emotion'] = emotion
        to_save.append(tweet)
    
        counter += 1
        # Save every 10 tweets or on the last tweet
        if counter % 20 == 0 or i == len(tweets) - 1:
            save_processed_tweets(localprocessed_file_path, to_save)
            result = upload_processed_tweets(localprocessed_file_path, final_tweets_file_path)    
            if result:
                print(f"Saved up to tweet {i+1}")
                to_save = []  # Reset the list for the next batch

    # Print results
    print(sentiment_results)
    print(emotion_results)
    for i, tweet in enumerate(tweets):
        print(f"{i}: " + tweet.get('cleanContent', ''))
    
    for i, tweet in enumerate(stocktweets):
        print(f"{i}: " + tweet.get('cleanContent', ''))

counter = 0
while True:
    print("downloading")
    tweets = compare_tweets_and_return_new(twitter_file_path, final_twitter_file_path)
    print("downloaded")
    print("downloading")
    stocktweets = compare_tweets_and_return_new(stocktweets_file_path, final_stocktweets_file_path)
    print("downloaded")

    print("Twitter:", ticker)
    process_and_save_tweets(tweets, ticker, final_twitter_file_path, f'processed_{twitter_file_path}')
    print("Stocktwits:", ticker)
    process_and_save_tweets(stocktweets, ticker, final_stocktweets_file_path, f'processed_{twitter_file_path}')

    counter += 1
    print(f"+-+-+-+-+-+-+-+-+-+-Cycles Completed: {counter}+-+-+-+-+-+-+-+-+-+-")
    for i in range(60):
        time.sleep(1)



downloading
downloaded
downloading
downloaded
Twitter: TSLA


Analyzing Tweets:   0%|          | 0/1332 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   0%|          | 1/1332 [00:06<2:22:01,  6.40s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   0%|          | 2/1332 [00:10<1:48:10,  4.88s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   0%|          | 3/1332 [00:14<1:37:29,  4.40s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   0%|          | 4/1332 [00:17<1:26:43,  3.92s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end ge

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:   2%|▏         | 20/1332 [01:20<1:38:57,  4.53s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 20


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   2%|▏         | 21/1332 [01:24<1:35:12,  4.36s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   2%|▏         | 22/1332 [01:28<1:32:19,  4.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   2%|▏         | 23/1332 [01:32<1:27:33,  4.01s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   2%|▏         | 24/1332 [01:36<1:26:45,  3.98s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   2%|▏         | 25/1332 [01:40<1:26:35,  3.98s/it]Setting `pad_token_id` to `eos_token_id`:2 

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:   3%|▎         | 40/1332 [02:39<1:33:57,  4.36s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 40


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   3%|▎         | 41/1332 [02:43<1:29:53,  4.18s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   3%|▎         | 42/1332 [02:47<1:25:10,  3.96s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   3%|▎         | 43/1332 [02:51<1:25:28,  3.98s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   3%|▎         | 44/1332 [02:54<1:21:36,  3.80s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   3%|▎         | 45/1332 [02:58<1:23:18,  3.88s/it]Setting `pad_token_id` to `eos_token_id`:2 

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:   5%|▍         | 60/1332 [03:58<1:38:36,  4.65s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 60


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   5%|▍         | 61/1332 [04:02<1:32:44,  4.38s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   5%|▍         | 62/1332 [04:05<1:26:29,  4.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   5%|▍         | 63/1332 [04:09<1:26:23,  4.08s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   5%|▍         | 64/1332 [04:13<1:23:57,  3.97s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   5%|▍         | 65/1332 [04:17<1:24:06,  3.98s/it]Setting `pad_token_id` to `eos_token_id`:2 

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:   6%|▌         | 80/1332 [05:18<1:37:37,  4.68s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 80


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   6%|▌         | 81/1332 [05:22<1:34:21,  4.53s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   6%|▌         | 82/1332 [05:26<1:26:42,  4.16s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   6%|▌         | 83/1332 [05:30<1:25:54,  4.13s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   6%|▋         | 84/1332 [05:34<1:25:04,  4.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   6%|▋         | 85/1332 [05:38<1:25:08,  4.10s/it]Setting `pad_token_id` to `eos_token_id`:2 

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:   8%|▊         | 100/1332 [06:40<1:36:34,  4.70s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 100


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   8%|▊         | 101/1332 [06:43<1:31:38,  4.47s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   8%|▊         | 102/1332 [06:48<1:29:21,  4.36s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   8%|▊         | 103/1332 [06:52<1:28:27,  4.32s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   8%|▊         | 104/1332 [06:56<1:25:41,  4.19s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   8%|▊         | 105/1332 [07:00<1:23:15,  4.07s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:   9%|▉         | 120/1332 [08:01<1:33:25,  4.62s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 120


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   9%|▉         | 121/1332 [08:05<1:30:23,  4.48s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   9%|▉         | 122/1332 [08:09<1:28:05,  4.37s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   9%|▉         | 123/1332 [08:13<1:24:18,  4.18s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   9%|▉         | 124/1332 [08:17<1:23:59,  4.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:   9%|▉         | 125/1332 [08:21<1:24:03,  4.18s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  11%|█         | 140/1332 [09:26<1:39:15,  5.00s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 140


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  11%|█         | 141/1332 [09:30<1:32:13,  4.65s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  11%|█         | 142/1332 [09:34<1:25:00,  4.29s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  11%|█         | 143/1332 [09:37<1:20:02,  4.04s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  11%|█         | 144/1332 [09:40<1:16:34,  3.87s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  11%|█         | 145/1332 [09:45<1:18:14,  3.96s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  12%|█▏        | 160/1332 [10:46<1:25:53,  4.40s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 160


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  12%|█▏        | 161/1332 [10:50<1:23:32,  4.28s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  12%|█▏        | 162/1332 [10:53<1:18:13,  4.01s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  12%|█▏        | 163/1332 [10:57<1:19:36,  4.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  12%|█▏        | 164/1332 [11:02<1:19:59,  4.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  12%|█▏        | 165/1332 [11:05<1:19:10,  4.07s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  14%|█▎        | 180/1332 [12:06<1:25:35,  4.46s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 180


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  14%|█▎        | 181/1332 [12:09<1:19:37,  4.15s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  14%|█▎        | 182/1332 [12:13<1:18:47,  4.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  14%|█▎        | 183/1332 [12:18<1:18:41,  4.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  14%|█▍        | 184/1332 [12:22<1:18:43,  4.11s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  14%|█▍        | 185/1332 [12:26<1:18:56,  4.13s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  15%|█▌        | 200/1332 [13:26<1:21:37,  4.33s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 200


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  15%|█▌        | 201/1332 [13:30<1:19:54,  4.24s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  15%|█▌        | 202/1332 [13:34<1:18:59,  4.19s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  15%|█▌        | 203/1332 [13:38<1:14:13,  3.95s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  15%|█▌        | 204/1332 [13:42<1:14:28,  3.96s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  15%|█▌        | 205/1332 [13:46<1:15:25,  4.02s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  17%|█▋        | 220/1332 [14:47<1:27:26,  4.72s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 220


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  17%|█▋        | 221/1332 [14:51<1:24:31,  4.57s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  17%|█▋        | 222/1332 [14:55<1:17:41,  4.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  17%|█▋        | 223/1332 [14:59<1:17:15,  4.18s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  17%|█▋        | 224/1332 [15:03<1:14:58,  4.06s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  17%|█▋        | 225/1332 [15:07<1:15:32,  4.09s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  18%|█▊        | 240/1332 [16:09<1:26:51,  4.77s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 240


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  18%|█▊        | 241/1332 [16:13<1:23:14,  4.58s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  18%|█▊        | 242/1332 [16:16<1:16:36,  4.22s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  18%|█▊        | 243/1332 [16:20<1:16:03,  4.19s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  18%|█▊        | 244/1332 [16:24<1:14:20,  4.10s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  18%|█▊        | 245/1332 [16:28<1:14:23,  4.11s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  20%|█▉        | 260/1332 [17:30<1:16:54,  4.30s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 260


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  20%|█▉        | 261/1332 [17:34<1:15:26,  4.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  20%|█▉        | 262/1332 [17:38<1:14:50,  4.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  20%|█▉        | 263/1332 [17:42<1:14:52,  4.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  20%|█▉        | 264/1332 [17:47<1:15:34,  4.25s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  20%|█▉        | 265/1332 [17:50<1:13:27,  4.13s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  21%|██        | 280/1332 [18:55<1:28:58,  5.07s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 280


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  21%|██        | 281/1332 [18:59<1:23:42,  4.78s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  21%|██        | 282/1332 [19:03<1:18:34,  4.49s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  21%|██        | 283/1332 [19:07<1:16:50,  4.40s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  21%|██▏       | 284/1332 [19:10<1:13:23,  4.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  21%|██▏       | 285/1332 [19:14<1:11:26,  4.09s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  23%|██▎       | 300/1332 [20:17<1:23:29,  4.85s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 300


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  23%|██▎       | 301/1332 [20:21<1:18:11,  4.55s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  23%|██▎       | 302/1332 [20:25<1:15:52,  4.42s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  23%|██▎       | 303/1332 [20:29<1:13:40,  4.30s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  23%|██▎       | 304/1332 [20:33<1:12:30,  4.23s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  23%|██▎       | 305/1332 [20:37<1:12:17,  4.22s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  24%|██▍       | 320/1332 [21:38<1:18:20,  4.65s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 320


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  24%|██▍       | 321/1332 [21:42<1:16:27,  4.54s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  24%|██▍       | 322/1332 [21:46<1:14:14,  4.41s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  24%|██▍       | 323/1332 [21:51<1:13:33,  4.37s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  24%|██▍       | 324/1332 [21:54<1:08:44,  4.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  24%|██▍       | 325/1332 [21:58<1:07:29,  4.02s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  26%|██▌       | 340/1332 [23:01<1:19:05,  4.78s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 340


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  26%|██▌       | 341/1332 [23:05<1:16:35,  4.64s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  26%|██▌       | 342/1332 [23:09<1:13:32,  4.46s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  26%|██▌       | 343/1332 [23:13<1:11:53,  4.36s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  26%|██▌       | 344/1332 [23:17<1:09:06,  4.20s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  26%|██▌       | 345/1332 [23:21<1:09:06,  4.20s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  27%|██▋       | 360/1332 [24:22<1:14:06,  4.58s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 360


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  27%|██▋       | 361/1332 [24:26<1:09:53,  4.32s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  27%|██▋       | 362/1332 [24:30<1:09:05,  4.27s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  27%|██▋       | 363/1332 [24:33<1:04:31,  4.00s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  27%|██▋       | 364/1332 [24:38<1:05:08,  4.04s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  27%|██▋       | 365/1332 [24:41<1:04:08,  3.98s/it]Setting `pad_token_id` to `eos_token_i

C:/Users/Mum/Documents/news_aggregation_ipynb/final_TSLA_tweets.jsonl
C:/Users/Mum/Documents/news_aggregation_ipynb/temp_final_TSLA_tweets.jsonl


Analyzing Tweets:  29%|██▊       | 380/1332 [25:43<1:11:15,  4.49s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Command executed successfully
Saved up to tweet 380


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  29%|██▊       | 381/1332 [25:47<1:08:33,  4.33s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  29%|██▊       | 382/1332 [25:51<1:04:19,  4.06s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  29%|██▉       | 383/1332 [25:55<1:03:18,  4.00s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  29%|██▉       | 384/1332 [25:58<1:02:02,  3.93s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  29%|██▉       | 385/1332 [26:03<1:03:26,  4.02s/it]Setting `pad_token_id` to `eos_token_i

In [None]:
!ip link show eth0

Debug

In [None]:
article = NewsPlease.from_url('https://finance.yahoo.com/news/alaska-airlines-begun-flying-boeing-150009733.html')

# Split the article text into sentences
sentences = sent_tokenize(article.maintext)
print("Sentences in the article: ", len(sentences))

# Classify emotion for each sentence with a progress bar
emotion_results = []
for i, sentence in enumerate(tqdm(sentences, desc="Processing Sentences")):
    emotion = classify_sentiment(sentence)
    emotion_results.append(emotion)

# Do something with the results
print(emotion_results)

# print(article.maintext)