In [1]:
!pip install -U datasets
!pip install wandb
!pip install torch
!pip install Cython

!pip install torch torchvision torchaudio
!pip install -U transformers
!pip install peft
!pip install -U bitsandbytes
!pip install tensorboard
!pip install accelerate -U
!pip install pandas
!pip install tqdm
from tqdm import tqdm
!pip install paramiko scp

[0m

In [29]:
from datetime import datetime
import os
import sys
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer

from peft import (
    PeftModel,
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)

base_model = "mistralai/Mistral-7B-v0.1"
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

model = PeftModel.from_pretrained(model, "dohonba/mistral_7b_fingpt")
model.eval()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear8bitLt(
                (base_layer): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear8bitLt(
                (base_layer

In [None]:
# eval_prompt = """
# Is this sentence self-promotional? Answer with {no/yes}? "Building brick by brick, our analysts motto! Pay a visit to our Community".
# """

# model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

# with torch.no_grad():
#     output = model.generate(**model_input, max_new_tokens=150)[0]
#     decoded_output = tokenizer.decode(output, skip_special_tokens=True)

# print(decoded_output)

In [30]:
# Function to classify emotion of a sentence
def classify_sentiment(sentence):
    eval_prompt = f"""Context: {sentence}

Question: 'What is the sentiment of this sentence? Please choose an answer from {{strong negative/moderately negative/mildly negative/neutral/mildly positive/moderately positive/strong positive}}.'
"""
    model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**model_input, max_new_tokens=150)[0]
        decoded_output = tokenizer.decode(output, skip_special_tokens=True)

    answer = decoded_output.split("Answer: ", 1)[1]
    return answer  # You might need to further process this to extract the emotion

def classify_emotion(sentence):
    eval_prompt = f"""Context: {sentence}

Question: 'What is the emotion shown in this text? Please choose an answer from {{anger/fear/joy/love/sadness/surprise/neutral}}'.
"""
    model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**model_input, max_new_tokens=150)[0]
        decoded_output = tokenizer.decode(output, skip_special_tokens=True)

    answer = decoded_output.split("Answer: ", 1)[1]
    return answer  # You might need to further process this to extract the emotion

In [31]:
# classify_emotion("I love it. Thanks.")

In [39]:
import paramiko
from scp import SCPClient

def create_ssh_client(server, port, user, password):
    client = paramiko.SSHClient()
    client.load_system_host_keys()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    client.connect(server, port, user, password)
    return client

def upload_files(ssh_client, local_path, remote_path):
    with SCPClient(ssh_client.get_transport()) as scp:
        scp.put(local_path, remote_path)  # Use put for uploading

def append_file(ssh_client, local_path, remote_path, temp_path):
    # Step 1: Transfer the file to a temporary location
    with SCPClient(ssh_client.get_transport()) as scp:
        scp.put(local_path, temp_path)
    
    # Step 2: Append the content of the temporary file to the target file
    command = f'cat {temp_path} >> {remote_path}; rm {temp_path}'
    ssh_client.exec_command(command)

def download_files(ssh_client, remote_path, local_path):
    with SCPClient(ssh_client.get_transport()) as scp:
        scp.get(remote_path, local_path)

# Optional: Execute a command or run a script on the remote machine
# stdin, stdout, stderr = ssh_client.exec_command('python /path/to/remote/script.py')
# print(stdout.read().decode())  # Assuming the script has output

def close_client():
    # Close the SSH connection
    ssh_client.close()

# # SSH Connection Info
# server = 'sshhop.hopto.org'
# port = 22  # Default SSH port
# user = 'bow33'
# password = 'wee.com123'
# remote_path = '/C:/Users/bow33/Documents/GitHub/stocktwits-sentiment/processed_english_tweets3.jsonl'
# local_path = './processed_english_tweets.jsonl'

# # Create SSH client and connect
# ssh_client = create_ssh_client(server, port, user, password)
# upload_files(ssh_client, './processed_english_tweets.jsonl', '/C:/Users/bow33/Documents/GitHub/stocktwits-sentiment/processed_english_tweets.jsonl')
# download_files(ssh_client, '/C:/Users/bow33/Documents/GitHub/stocktwits-sentiment/english_tweets.jsonl', './english_tweets.jsonl')

In [40]:
import json
from datetime import datetime, timedelta
import pandas as pd
import pytz
from tqdm import tqdm

server = 'sshhop.hopto.org'
port = 22  # Default SSH port
user = 'bow33'
password = 'wee.com123'

def download_recent_tweets():
    ssh_client = create_ssh_client(server, port, user, password)
    download_files(ssh_client, '/C:/Users/bow33/Documents/GitHub/stocktwits-sentiment/english_tweets.jsonl', './english_tweets.jsonl')
    close_client()

def upload_processed_tweets():
    ssh_client = create_ssh_client(server, port, user, password)
    append_file(ssh_client, './processed_english_tweets.jsonl', '/C:/Users/bow33/Documents/GitHub/stocktwits-sentiment/processed_english_tweets.jsonl', '/C:/Users/bow33/Documents/GitHub/stocktwits-sentiment/processed_english_tweets_temp.jsonl')
    close_client()

def load_recent_tweets(file_path, cutoff_time):
    download_recent_tweets()
    recent_tweets = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            tweet = json.loads(line)
            tweet_time = datetime.fromisoformat(tweet['date'])  # Assuming UTC and removing 'Z'
            if tweet_time > cutoff_time:
                recent_tweets.append(tweet)  # Assuming text is under 'rawContent'
    return recent_tweets

def save_processed_tweets(file_path, tweets):
    with open(file_path, 'w', encoding='utf-8') as file:
        for tweet in tweets:
            # Create a new dictionary with only the required fields
            filtered_tweet = {
                'id': tweet['id'],  # Assuming each tweet has a unique 'id'
                'date': tweet['date'],
                'cleanContent': tweet.get('cleanContent', ''),
                'url': tweet.get('url', ''),
                'emotion': tweet.get('emotion', ''),
                'sentiment': tweet.get('sentiment', '')
            }
            json.dump(filtered_tweet, file)
            file.write('\n')
    upload_processed_tweets()

# Calculate the cutoff time for the last 30 minutes
cutoff_time = datetime.utcnow().replace(tzinfo=pytz.utc) - timedelta(minutes=30)
tweets = load_recent_tweets('./english_tweets.jsonl', cutoff_time)

limit = 10
tweets = tweets[:limit]

sentiment_results = []
emotion_results = []
for i, tweet in enumerate(tqdm(tweets, desc="Analyzing Tweets")):
    clean_content = tweet.get('cleanContent', '')
    
    sentiment = classify_sentiment(clean_content)
    emotion = classify_emotion(clean_content)
    
    sentiment_results.append(f"{i}: " + sentiment)
    emotion_results.append(f"{i}: " + emotion)
    
    tweet['sentiment'] = sentiment
    tweet['emotion'] = emotion

#Save inference
save_processed_tweets('./processed_english_tweets.jsonl', tweets)

# Print results
print(sentiment_results)
print(emotion_results)
for i, tweet in enumerate(tweets):
    print(f"{i}: " + tweet.get('cleanContent', ''))

Analyzing Tweets:   0%|          | 0/10 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  10%|█         | 1/10 [00:03<00:29,  3.25s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  20%|██        | 2/10 [00:06<00:25,  3.24s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  30%|███       | 3/10 [00:10<00:23,  3.38s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzing Tweets:  40%|████      | 4/10 [00:13<00:20,  3.46s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Analyzin

['0: strong negative', '1: strong positive', '2: mildly negative', '3: mildly positive', '4: mildly positive', '5: mildly negative', '6: neutral', '7: neutral', '8: mildly positive', '9: neutral']
['0: anger', '1: love', '2: neutral', '3: neutral', '4: neutral', '5: neutral', '6: neutral', '7: neutral', '8: neutral', '9: neutral']
0: Tesla is crap , buy German cars!
1: damn that is one sexy.... Tesla.
2: Well, we all hope so, , but Tesla stock price doesn't prove it.
3: I really would like to have a Referral program like Tesla. I’ve sold my 6th and I’m not even working for you. new lease program is not well known and I called three families who didn’t want to wait - said “now you can get one in 1-6 weeks”. Walked them through it all.
4: Yes exactly but prices have corrected at least with Tesla. Buying new vs used is crazy at this time.
5: I applaud for the heads up on a planned price change. The out-of-the blue price changes are a sales tactic that can cause a negative experience for c

In [19]:
article = NewsPlease.from_url('https://finance.yahoo.com/news/alaska-airlines-begun-flying-boeing-150009733.html')

# Split the article text into sentences
sentences = sent_tokenize(article.maintext)
print("Sentences in the article: ", len(sentences))

# Classify emotion for each sentence with a progress bar
emotion_results = []
for i, sentence in enumerate(tqdm(sentences, desc="Processing Sentences")):
    emotion = classify_sentiment(sentence)
    emotion_results.append(emotion)

# Do something with the results
print(emotion_results)

# print(article.maintext)