In [2]:
# Install required packages
!pip install streamlit transformers torch accelerate peft bitsandbytes -q

In [3]:
!pip install matplotlib wordcloud



In [None]:
#connect for calling Thai font file
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#GPT 4.o mini
%%writefile app.py
import streamlit as st
import pandas as pd
import re
import ast
import torch
import openai
import os
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from transformers import pipeline
from collections import Counter


torch.cuda.empty_cache()

openai.api_key = "your API Key"


system_prompt = """You are a text analysis model. Your task is to extract aspects, sentiments, and opinion words from a given sentence that is an opinion about a newly launched car.
Task:
For each aspect in the sentence, you need to identify:
The aspect terms which are related to car (if explicitly present or inferred from context. If not explicitly mentioned,provide the most likely one based on the context).
The sentiment associated with the aspect ('pos' for positive, 'neg' for negative).
The opinion words related to each aspect. (if explicitly present or inferred from context. If not explicitly mentioned,provide the most likely one).
Then return the output in a format of a list of lists format without additional explanation in front or at the end of the list:
[["aspect1", "sentiment1", "opinion_word1"], ["aspect2", "sentiment2", ["opinion_word2.1", "opinion_word2.2",...]], ...]
If a sentence has multiple aspects or opinions, list each aspect separately in the output.
The output should be a list of lists, where each inner list contains: The aspect term, the sentiment and the opinion words
Output only in the format without extra explanation or characters.

Example Outputs:
Input: "ดีไซน์สวยมาก แต่ราคาแพงเกิน"
Output: [["ดีไซน์", "pos", "สวย"], ["ราคา", "neg", "แพงเกิน"]]

Input: "สวย ดุดัน แรง ออปชั่นดี ติดอย่างเดียวเรื่องแบรนด์"
Output: [["ดีไซน์", "pos", ["สวย", "ดุดัน"]], ["เครื่องยนต์", "pos", "แรง"], ["ออปชั่น", "pos", "ดี"], ["แบรนด์", "neg", "ไม่ดี"]]

Input: "ทั้งวัสดุทั้งงานประกอบแย่มาก"
Output: [["วัสดุ", "neg", "แย่"], ["งานประกอบ", "neg", "แย่"]]

Input: "ดูแล้วเหมือนหัวจะชนหลังคาดูอึดอัด"
Output: [["พื้นที่ห้องโดยสาร", "neg", ["เหมือนหัวจะชนหลังคา","ดูอึดอัด"]]]

Input: "ถ้าเตี้ยแบบนี้ ลุยน้ำท่วมไม่รอดแน่"
Output: [["รถ", "neg", ["เตี้ย","ลุยน้ำท่วมไม่รอด"]]]

Input: "รถก็หล่อ พี่นิธิก็เท่ห์ วันนี้ใส่เสื้อฮาวายซะด้วย"
Output: [["รถ", "pos", "หล่อ"]]

Input: "Your sentence here"
Output:
"""

def complete(user_prompt, system_prompt):
    completion = openai.chat.completions.create(
        model= 'gpt-4o-mini',
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.4,
        max_tokens=256,
        top_p=0.9,
    )
    return completion.choices[0].message.content

def process_GPT4o(sentence):
    global system_prompt
    system_prompt = system_prompt
    result = complete(sentence, system_prompt)
    return result



# Streamlit app layout
st.title("Sentiment Analysis for Car Comments")
st.write("Upload a CSV file with comments about a car. The app will analyze aspects, sentiments, and opinion words using an LLM.")

# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file is not None:
    # Load the CSV file
    df = pd.read_csv(uploaded_file)

    # Ask for the column name with comments
    comment_column = st.text_input("Enter the column name that contains the comments")

    if comment_column and comment_column in df.columns:
        st.write("Processing...")

        # Process each comment with the LLM
        aspects_list, sentiments_list, opinions_list = [], [], []

        for comment in df[comment_column]:
            # Generate prompt and get LLM response
            response = process_GPT4o(comment)
            st.write("### Generated Response:")
            st.write(response)

            # Extract aspects, sentiments, and opinions from the response
            output_cleaned = re.search(r'(\[.*\])', response)

            if output_cleaned:
                try:
                    # Try to evaluate the response to check if it's in the correct format
                    data = ast.literal_eval(output_cleaned.group(1))
                    #st.write("Debug: data", data)
                    # Ensure the structure is in the form of a list of tuples
                    for item in data:
                        if isinstance(item, list) and len(item) == 3:  # Each item must have 3 elements
                            aspect, sentiment, opinion = item
                            aspects_list.append(aspect)
                            sentiments_list.append(sentiment)
                            if isinstance(opinion, list):  # If opinion is a list, extend it
                                opinions_list.extend(opinion)
                            else:  # Otherwise, append the opinion directly
                                opinions_list.append(opinion)
                except (ValueError, SyntaxError):
                    st.warning(f"Skipping malformed response: {response}")

        #st.write("Debug: Aspects List", aspects_list)
        #st.write("Debug: Sentiments List", sentiments_list)
        #st.write("Debug: Opinions List", opinions_list)

  # Generate Word Clouds
        font_path = '/content/drive/MyDrive/TH Sarabun New Regular.ttf'
        def generate_wordcloud(words, title, font_path):
            wordcloud = WordCloud(
            font_path = font_path,
            regexp = '[ก-๙]+',
            width=600, height=300,
            background_color='white').generate(' '.join(words))
            fig, ax = plt.subplots(figsize=(6, 3))
            ax.imshow(wordcloud, interpolation='bilinear')
            ax.axis('off')
            plt.title(title)
            st.pyplot(fig)

        col1, col2 = st.columns([1, 1])  # Equal width for both columns

        # Display Word Cloud for Aspects in Column 1
        with col1:
            st.subheader("Word Cloud of Aspects")
            generate_wordcloud(aspects_list, "Aspects Word Cloud", font_path)

        # Display Word Cloud for Opinion Words in Column 2
        with col2:
            st.subheader("Word Cloud of Opinions")
            generate_wordcloud(opinions_list, "Opinion Words Word Cloud", font_path)

        # Adjust the size of the bar chart and display it below
        st.subheader("Sentiment Frequency")
        sentiment_counts = Counter(sentiments_list)
        fig, ax = plt.subplots(figsize=(8, 4))  # Smaller size for better layout
        ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['blue', 'red'])
        ax.set_xlabel("Sentiment")
        ax.set_ylabel("Frequency")
        ax.set_title("Frequency of Positive and Negative Sentiments")
        st.pyplot(fig)
    else:
        if column_name:  # Show error only if a column name is entered but not found
            st.error(f"Column '{column_name}' not found in the uploaded file.")
else:
    st.write("Please upload a CSV file to analyze.")

Overwriting app.py


In [None]:
#connect to web
#! pip install streamlit -q
!wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501
#numbers in first line of output is tunnel password

34.142.238.230

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.142.238.230:8501[0m
[0m
your url is: https://shaky-ties-wait.loca.lt
2024-11-16 03:00:56.639262: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-16 03:00:56.655961: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-16 03:00:56.684011: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unab

# Using Typhoon with Quantization

In [None]:
#Typhoon
%%writefile app.py
import streamlit as st
import pandas as pd
import re
import ast
import torch
import openai
import os
import matplotlib.pyplot as plt
import transformers
from transformers import AutoModelForCausalLM,AutoTokenizer,BitsAndBytesConfig
from wordcloud import WordCloud
from transformers import pipeline
from collections import Counter

torch.cuda.empty_cache()

os.environ['HF_TOKEN'] ="your API Key""
hf_token               = os.environ['HF_TOKEN']


compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

@st.cache_resource
def load_model_and_tokenizer():
    model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config,device_map="auto",token=hf_token)
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()

system_prompt = """You are a text analysis model. Your task is to extract aspects, sentiments, and opinion words from a given sentence that is an opinion about a newly launched car.
Task:
For each aspect in the sentence, you need to identify:
The aspect terms which are related to car (if explicitly present or inferred from context. If not explicitly mentioned,provide the most likely one based on the context).
The sentiment associated with the aspect ('pos' for positive, 'neg' for negative).
The opinion words related to each aspect. (if explicitly present or inferred from context. If not explicitly mentioned,provide the most likely one).
Then return the output in a format of a list of lists format without additional explanation in front or at the end of the list:
[["aspect1", "sentiment1", "opinion_word1"], ["aspect2", "sentiment2", ["opinion_word2.1", "opinion_word2.2",...]], ...]
If a sentence has multiple aspects or opinions, list each aspect separately in the output.
The output should be a list of lists, where each inner list contains: The aspect term, the sentiment and the opinion words
Output only in the format without extra explanation or characters.

Example Outputs:
Input: "ดีไซน์สวยมาก แต่ราคาแพงเกิน"
Output: [["ดีไซน์", "pos", "สวย"], ["ราคา", "neg", "แพงเกิน"]]

Input: "สวย ดุดัน แรง ออปชั่นดี ติดอย่างเดียวเรื่องแบรนด์"
Output: [["ดีไซน์", "pos", ["สวย", "ดุดัน"]], ["เครื่องยนต์", "pos", "แรง"], ["ออปชั่น", "pos", "ดี"], ["แบรนด์", "neg", "ไม่ดี"]]

Input: "ทั้งวัสดุทั้งงานประกอบแย่มาก"
Output: [["วัสดุ", "neg", "แย่"], ["งานประกอบ", "neg", "แย่"]]

Input: "ดูแล้วเหมือนหัวจะชนหลังคาดูอึดอัด"
Output: [["พื้นที่ห้องโดยสาร", "neg", ["เหมือนหัวจะชนหลังคา","ดูอึดอัด"]]]

Input: "ถ้าเตี้ยแบบนี้ ลุยน้ำท่วมไม่รอดแน่"
Output: [["รถ", "neg", ["เตี้ย","ลุยน้ำท่วมไม่รอด"]]]

Input: "รถก็หล่อ พี่นิธิก็เท่ห์ วันนี้ใส่เสื้อฮาวายซะด้วย"
Output: [["รถ", "pos", "หล่อ"]]

Input: "Your sentence here"
Output:
"""

def generate_model(prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    # Tokenize with attention mask
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        padding=True,
        truncation=True,
        return_tensors="pt"
    ).to(model.device)

    if isinstance(inputs, dict): #check if inputs are dict
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
    else:
        input_ids = inputs
        attention_mask = (input_ids != tokenizer.pad_token_id).long()  # Create attention mask

    # Set pad_token_id explicitly if needed
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    # Generate the output
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=256,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.4,
        top_p=0.9,
    )

    # Decode the response, excluding the prompt tokens
    response = outputs[0][input_ids.shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

def process_typhoon(sentence):  #repeat this step for temperature 0.2 - 0.5
    result = generate_model(sentence)
    return result



# Streamlit app layout
st.title("LLM-powered Sentiment Analysis for Car Comments")
st.write("Upload a CSV file with comments about a car. The app will analyze aspects, sentiments, and opinion words using an LLM.")

# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file is not None:
    # Load the CSV file
    df = pd.read_csv(uploaded_file)

    # Ask for the column name with comments
    comment_column = st.text_input("Enter the column name that contains the comments")

    if comment_column and comment_column in df.columns:
        st.write("Processing...")

        # Process each comment with the LLM
        aspects_list, sentiments_list, opinions_list = [], [], []

        for comment in df[comment_column]:
            # Generate prompt and get LLM response
            response = process_typhoon(comment)
            st.write("### Generated Response:")
            st.write(response)

            # Extract aspects, sentiments, and opinions from the response
            output_cleaned = re.search(r'(\[.*\])', response)

            if output_cleaned:
                try:
                    # Try to evaluate the response to check if it's in the correct format
                    data = ast.literal_eval(output_cleaned.group(1))
                    #st.write("Debug: data", data)
                    # Ensure the structure is in the form of a list of tuples
                    for item in data:
                        if isinstance(item, list) and len(item) == 3:  # Each item must have 3 elements
                            aspect, sentiment, opinion = item
                            aspects_list.append(aspect)
                            sentiments_list.append(sentiment)
                            if isinstance(opinion, list):  # If opinion is a list, extend it
                                opinions_list.extend(opinion)
                            else:  # Otherwise, append the opinion directly
                                opinions_list.append(opinion)
                except (ValueError, SyntaxError):
                    st.warning(f"Skipping malformed response: {response}")

        #st.write("Debug: Aspects List", aspects_list)
        #st.write("Debug: Sentiments List", sentiments_list)
        #st.write("Debug: Opinions List", opinions_list)

  # Generate Word Clouds
        font_path = '/content/drive/MyDrive/TH Sarabun New Regular.ttf'
        def generate_wordcloud(words, title, font_path):
            wordcloud = WordCloud(
            font_path = font_path,
            regexp = '[ก-๙]+',
            width=600, height=300,
            background_color='white').generate(' '.join(words))
            fig, ax = plt.subplots(figsize=(6, 3))
            ax.imshow(wordcloud, interpolation='bilinear')
            ax.axis('off')
            plt.title(title)
            st.pyplot(fig)

        col1, col2 = st.columns([1, 1])  # Equal width for both columns

        # Display Word Cloud for Aspects in Column 1
        with col1:
            st.subheader("Word Cloud of Aspects")
            generate_wordcloud(aspects_list, "Aspects Word Cloud", font_path)

        # Display Word Cloud for Opinion Words in Column 2
        with col2:
            st.subheader("Word Cloud of Opinions")
            generate_wordcloud(opinions_list, "Opinion Words Word Cloud", font_path)

        # Adjust the size of the bar chart and display it below
        st.subheader("Sentiment Frequency")
        sentiment_counts = Counter(sentiments_list)
        fig, ax = plt.subplots(figsize=(8, 4))  # Smaller size for better layout
        ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['blue', 'red'])
        ax.set_xlabel("Sentiment")
        ax.set_ylabel("Frequency")
        ax.set_title("Frequency of Positive and Negative Sentiments")
        st.pyplot(fig)
    else:
        if column_name:  # Show error only if a column name is entered but not found
            st.error(f"Column '{column_name}' not found in the uploaded file.")
else:
    st.write("Please upload a CSV file to analyze.")

Writing app.py


In [None]:
!wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501

35.187.238.229

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.187.238.229:8501[0m
[0m
[1G[0JNeed to install the following packages:
  localtunnel@2.0.2
Ok to proceed? (y) [20Gy
[K[?25hyour url is: https://calm-teams-notice.loca.lt
2024-11-17 14:59:56.882211: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-17 14:59:56.898874: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been re

# Typhoon with LoRA

In [None]:
#Typhoon
%%writefile app.py
import streamlit as st
import pandas as pd
import re
import ast
import torch
import openai
import os
import matplotlib.pyplot as plt
import transformers
from transformers import AutoModelForCausalLM,AutoTokenizer,BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from wordcloud import WordCloud
from transformers import pipeline
from collections import Counter
from peft import TaskType

torch.cuda.empty_cache()





@st.cache_resource
def load_model_and_tokenizer():

    os.environ['HF_TOKEN'] ="your API Key""
    hf_token               = os.environ['HF_TOKEN']
    model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"

    use_cuda = torch.cuda.is_available()
    if use_cuda:
       try:
            model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",load_in_4bit=True,token=hf_token)
       except Exception as e:
            print(f"Failed to load in 4-bit mode: {e}")
            print("Falling back to full precision.")
            model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
    else:
        print("CUDA not available. Loading model in CPU mode.")
        model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None).to("cpu")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

    lora_config = LoraConfig(
    r=16,
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj","k_proj","v_proj","o_proj"],  # Apply LoRA to specific attention modules
    lora_dropout=0.1,  # Dropout probability for LoRA
    bias="none",
    task_type="CAUSAL_LM" # Specify the task type as causal language modeling
    )
    model = prepare_model_for_kbit_training(model)
    model = get_peft_model(model, lora_config)

# Print the number of trainable parameters
    model.print_trainable_parameters()




model, tokenizer = load_model_and_tokenizer()

system_prompt = """You are a text analysis model. Your task is to extract aspects, sentiments, and opinion words from a given sentence that is an opinion about a newly launched car.
Task:
For each aspect in the sentence, you need to identify:
The aspect terms which are related to car (if explicitly present or inferred from context. If not explicitly mentioned,provide the most likely one based on the context).
The sentiment associated with the aspect ('pos' for positive, 'neg' for negative).
The opinion words related to each aspect. (if explicitly present or inferred from context. If not explicitly mentioned,provide the most likely one).
Then return the output in a format of a list of lists format without additional explanation in front or at the end of the list:
[["aspect1", "sentiment1", "opinion_word1"], ["aspect2", "sentiment2", ["opinion_word2.1", "opinion_word2.2",...]], ...]
If a sentence has multiple aspects or opinions, list each aspect separately in the output.
The output should be a list of lists, where each inner list contains: The aspect term, the sentiment and the opinion words
Output only in the format without extra explanation or characters.

Example Outputs:
Input: "ดีไซน์สวยมาก แต่ราคาแพงเกิน"
Output: [["ดีไซน์", "pos", "สวย"], ["ราคา", "neg", "แพงเกิน"]]

Input: "สวย ดุดัน แรง ออปชั่นดี ติดอย่างเดียวเรื่องแบรนด์"
Output: [["ดีไซน์", "pos", ["สวย", "ดุดัน"]], ["เครื่องยนต์", "pos", "แรง"], ["ออปชั่น", "pos", "ดี"], ["แบรนด์", "neg", "ไม่ดี"]]

Input: "ทั้งวัสดุทั้งงานประกอบแย่มาก"
Output: [["วัสดุ", "neg", "แย่"], ["งานประกอบ", "neg", "แย่"]]

Input: "ดูแล้วเหมือนหัวจะชนหลังคาดูอึดอัด"
Output: [["พื้นที่ห้องโดยสาร", "neg", ["เหมือนหัวจะชนหลังคา","ดูอึดอัด"]]]

Input: "ถ้าเตี้ยแบบนี้ ลุยน้ำท่วมไม่รอดแน่"
Output: [["รถ", "neg", ["เตี้ย","ลุยน้ำท่วมไม่รอด"]]]

Input: "รถก็หล่อ พี่นิธิก็เท่ห์ วันนี้ใส่เสื้อฮาวายซะด้วย"
Output: [["รถ", "pos", "หล่อ"]]

Input: "Your sentence here"
Output:
"""

def generate_model(prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    # Tokenize with attention mask
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        padding=True,
        truncation=True,
        return_tensors="pt"
    ).to(model.device)

    if isinstance(inputs, dict): #check if inputs are dict
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
    else:
        input_ids = inputs
        attention_mask = (input_ids != tokenizer.pad_token_id).long()  # Create attention mask

    # Set pad_token_id explicitly if needed
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    # Generate the output
    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_new_tokens=256,
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.4,
        top_p=0.9,
    )

    # Decode the response, excluding the prompt tokens
    response = outputs[0][input_ids.shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

def process_typhoon(sentence):  #repeat this step for temperature 0.2 - 0.5
    result = generate_model(sentence)
    return result



# Streamlit app layout
st.title("LLM-powered Sentiment Analysis for Car Comments")
st.write("Upload a CSV file with comments about a car. The app will analyze aspects, sentiments, and opinion words using an LLM.")

# File uploader
uploaded_file = st.file_uploader("Upload your CSV file", type="csv")

if uploaded_file is not None:
    # Load the CSV file
    df = pd.read_csv(uploaded_file)

    # Ask for the column name with comments
    comment_column = st.text_input("Enter the column name that contains the comments")

    if comment_column and comment_column in df.columns:
        st.write("Processing...")

        # Process each comment with the LLM
        aspects_list, sentiments_list, opinions_list = [], [], []

        for comment in df[comment_column]:
            # Generate prompt and get LLM response
            response = process_typhoon(comment)
            st.write("### Generated Response:")
            st.write(response)

            # Extract aspects, sentiments, and opinions from the response
            output_cleaned = re.search(r'(\[.*\])', response)

            if output_cleaned:
                try:
                    # Try to evaluate the response to check if it's in the correct format
                    data = ast.literal_eval(output_cleaned.group(1))
                    #st.write("Debug: data", data)
                    # Ensure the structure is in the form of a list of tuples
                    for item in data:
                        if isinstance(item, list) and len(item) == 3:  # Each item must have 3 elements
                            aspect, sentiment, opinion = item
                            aspects_list.append(aspect)
                            sentiments_list.append(sentiment)
                            if isinstance(opinion, list):  # If opinion is a list, extend it
                                opinions_list.extend(opinion)
                            else:  # Otherwise, append the opinion directly
                                opinions_list.append(opinion)
                except (ValueError, SyntaxError):
                    st.warning(f"Skipping malformed response: {response}")

        #st.write("Debug: Aspects List", aspects_list)
        #st.write("Debug: Sentiments List", sentiments_list)
        #st.write("Debug: Opinions List", opinions_list)

  # Generate Word Clouds
        font_path = '/content/drive/MyDrive/TH Sarabun New Regular.ttf'
        def generate_wordcloud(words, title, font_path):
            wordcloud = WordCloud(
            font_path = font_path,
            regexp = '[ก-๙]+',
            width=600, height=300,
            background_color='white').generate(' '.join(words))
            fig, ax = plt.subplots(figsize=(6, 3))
            ax.imshow(wordcloud, interpolation='bilinear')
            ax.axis('off')
            plt.title(title)
            st.pyplot(fig)

        col1, col2 = st.columns([1, 1])  # Equal width for both columns

        # Display Word Cloud for Aspects in Column 1
        with col1:
            st.subheader("Word Cloud of Aspects")
            generate_wordcloud(aspects_list, "Aspects Word Cloud", font_path)

        # Display Word Cloud for Opinion Words in Column 2
        with col2:
            st.subheader("Word Cloud of Opinions")
            generate_wordcloud(opinions_list, "Opinion Words Word Cloud", font_path)

        # Adjust the size of the bar chart and display it below
        st.subheader("Sentiment Frequency")
        sentiment_counts = Counter(sentiments_list)
        fig, ax = plt.subplots(figsize=(8, 4))  # Smaller size for better layout
        ax.bar(sentiment_counts.keys(), sentiment_counts.values(), color=['blue', 'red'])
        ax.set_xlabel("Sentiment")
        ax.set_ylabel("Frequency")
        ax.set_title("Frequency of Positive and Negative Sentiments")
        st.pyplot(fig)
    else:
        if column_name:  # Show error only if a column name is entered but not found
            st.error(f"Column '{column_name}' not found in the uploaded file.")
else:
    st.write("Please upload a CSV file to analyze.")

Overwriting app.py


In [12]:
!wget -q -O - ipv4.icanhazip.com
! streamlit run app.py & npx localtunnel --port 8501

104.155.196.181

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://104.155.196.181:8501[0m
[0m
your url is: https://true-lines-taste.loca.lt
2024-11-18 17:14:36.531401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-18 17:14:36.573477: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-18 17:14:36.586599: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS