In [None]:
import pandas as pd
import nltk

# Download the stopwords dataset
nltk.download('stopwords')
# Download the punkt resource
nltk.download('punkt')

from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import re

# Define the path to your CSV file
dataset_path = '/content/google.csv'

# Read CSV file, skipping lines with errors
try:
    df = pd.read_csv(dataset_path)
except pd.errors.ParserError as e:
    print(f"ParserError: {e}")
    df = pd.read_csv(dataset_path, error_bad_lines=False)

def preprocess_text(text):
    # Check if the value is a string
    if isinstance(text, str):
        # Membersihkan teks
        text = re.sub(r'[^a-zA-Z\s]', '', text)
        text = text.lower()

        # Menghapus tanda koma
        text = text.replace(",", "")

        # Menghapus stopwords
        stop_words = set(stopwords.words('english'))
        words = word_tokenize(text)
        filtered_words = [word for word in words if word.lower() not in stop_words]
        text = ' '.join(filtered_words)

        # Stemming
        ps = PorterStemmer()
        words = word_tokenize(text)
        stemmed_words = [ps.stem(word) for word in words]
        text = ' '.join(stemmed_words)

        return text
    else:
        return ''  # Return an empty string for non-string values


# Terapkan preprocessing pada kolom 'komentar'
df['Preprocessed_Text'] = df['komentar'].apply(preprocess_text)

# Fungsi untuk analisis sentimen menggunakan TextBlob
def analyze_sentiment(text):
    analysis = TextBlob(text)
    # Klasifikasi sentimen: 'positive', 'negative', atau 'neutral'
    if analysis.sentiment.polarity > 0:
        return 'positive'
    elif analysis.sentiment.polarity < 0:
        return 'negative'
    else:
        return 'neutral'

# Analisis sentimen pada kolom 'Preprocessed_Text'
df['Predicted_Sentiment'] = df['Preprocessed_Text'].apply(analyze_sentiment)

# Tampilkan hasil analisis sentimen
print("Hasil Analisis Sentimen:")
print(df[['komentar', 'Predicted_Sentiment']])


ParserError: Error tokenizing data. C error: Expected 4 fields in line 13, saw 6

Hasil Analisis Sentimen:
                                                                                                                                                                                                   komentar  \
Berbagi pengalaman aja yah                         RSUD sangat baik pelayanan nya dari mulai satpa... satpam nya sangat respon membantu keluarga pasi...  poko nya the best buat SATPAM nya pertahankan ...   
Ikut berbagi pengalaman aja\nNi ayah saya udh s... NaN                                                NaN                                                                                               NaN   
semoga dibacasih ulasannya, tolonglah SATPAM di... NaN                                                NaN                                                                                               NaN   
Pelayanan nya tidak manusiawi mungkin karna pak... NaN           

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


  df = pd.read_csv(dataset_path, error_bad_lines=False)
Skipping line 13: expected 4 fields, saw 6
Skipping line 22: expected 4 fields, saw 7
Skipping line 24: expected 4 fields, saw 5
Skipping line 28: expected 4 fields, saw 5
Skipping line 33: expected 4 fields, saw 5
Skipping line 41: expected 4 fields, saw 5
Skipping line 42: expected 4 fields, saw 5
Skipping line 43: expected 4 fields, saw 11
Skipping line 72: expected 4 fields, saw 8
Skipping line 89: expected 4 fields, saw 9
Skipping line 92: expected 4 fields, saw 5
Skipping line 97: expected 4 fields, saw 5
Skipping line 108: expected 4 fields, saw 9
Skipping line 117: expected 4 fields, saw 5
Skipping line 124: expected 4 fields, saw 6
Skipping line 136: expected 4 fields, saw 5
Skipping line 149: 

In [None]:
df

Unnamed: 0,posts,predicted,intensity,Preprocessed_Text,Predicted_Sentiment
0,I know as parent of child with down syndrome t...,negative,-1,know parent child syndrom hear child high risk...,negative
1,but in my heart I know this is the future prom...,neutral,0,heart know futur promis articl regardless http...,negative
2,I have mylefibrosis which turn to leukemia the...,negative,-1,mylefibrosi turn leukemia want stem cell trans...,negative
3,from one of my health group subject wayne dyer...,neutral,0,one health group subject wayn dyer leukemia ca...,negative
4,gmos now link to leukemia http nsnbc I 2013 07...,neutral,0,gmo link leukemia http nsnbc gmo link leukemia...,neutral
...,...,...,...,...,...
10387,hey everyone I am a 25 year old male I work ou...,negative,-1,hey everyon year old male work eat pretti heal...,positive
10388,have surgery for stage 1 colon cancer 1 year a...,very negative,-2,surgeri stage colon cancer year ago cea level ...,positive
10389,the doctor advise we he could not remove the a...,neutral,0,doctor advis could remov abnorm polyp christ s...,negative
10390,my 66 year old father have been through so muc...,neutral,0,year old father much past year realli need tri...,negative


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 162 entries, ('Berbagi pengalaman aja yah', 'RSUD sangat baik pelayanan nya dari mulai satpam dan staf2 yg lain nya', 'satpam nya sangat respon membantu keluarga pasien contoh nya keluarga saya yg pernah di rawat di RSUD KOTA BANDUNG') to ('Pelayanan sangat tdk ramah. Sangat2 buruk apalagi thd keluarga pasien. Bayar dulu periksa blum masa dah dikasih resep. 🙄🙄 …', nan, nan)
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   komentar             9 non-null      object
 1   Preprocessed_Text    162 non-null    object
 2   Predicted_Sentiment  162 non-null    object
dtypes: object(3)
memory usage: 13.1+ KB


In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Define the path to your CSV file
dataset_path = '/content/Mental Health Dataset.csv'

# Read the CSV file
df = pd.read_csv(dataset_path, error_bad_lines=False)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("sarahlintang/IndoBERT")

# Load the model
model = AutoModelForSequenceClassification.from_pretrained("sarahlintang/IndoBERT")

def analyze_sentiment(text):
    # Check if the input is a string
    if isinstance(text, str):
        # Tokenize the text
        inputs = tokenizer(text, return_tensors='pt')

        # Get the model outputs
        outputs = model(**inputs)

        # Get the predicted probabilities
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)

        # Get the predicted sentiment
        sentiment = probs.argmax().item()

        # Map the sentiment index to a label
        if sentiment == 0:
            return 'negative'
        elif sentiment == 1:
            return 'neutral'
        else:
            return 'positive'
    else:
        return 'unknown' # Return 'unknown' for non-string values


# Apply the sentiment analysis function to the 'komentar' column
df['Predicted_Sentiment'] = df['komentar'].apply(analyze_sentiment)

# Print the results
print(df[['komentar', 'Predicted_Sentiment']])




  df = pd.read_csv(dataset_path, error_bad_lines=False)
Skipping line 13: expected 4 fields, saw 6
Skipping line 22: expected 4 fields, saw 7
Skipping line 24: expected 4 fields, saw 5
Skipping line 28: expected 4 fields, saw 5
Skipping line 33: expected 4 fields, saw 5
Skipping line 41: expected 4 fields, saw 5
Skipping line 42: expected 4 fields, saw 5
Skipping line 43: expected 4 fields, saw 11
Skipping line 72: expected 4 fields, saw 8
Skipping line 89: expected 4 fields, saw 9
Skipping line 92: expected 4 fields, saw 5
Skipping line 97: expected 4 fields, saw 5
Skipping line 108: expected 4 fields, saw 9
Skipping line 117: expected 4 fields, saw 5
Skipping line 124: expected 4 fields, saw 6
Skipping line 136: expected 4 fields, saw 5
Skipping line 149: expected 4 fields, saw 6
Skipping line 163: expected 4 fields, saw 7
Skipping line 176: expected 4 fields, saw 5

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at sarahlintang/IndoBER

                                                                                                                                                                                                   komentar  \
Berbagi pengalaman aja yah                         RSUD sangat baik pelayanan nya dari mulai satpa... satpam nya sangat respon membantu keluarga pasi...  poko nya the best buat SATPAM nya pertahankan ...   
Ikut berbagi pengalaman aja\nNi ayah saya udh s... NaN                                                NaN                                                                                               NaN   
semoga dibacasih ulasannya, tolonglah SATPAM di... NaN                                                NaN                                                                                               NaN   
Pelayanan nya tidak manusiawi mungkin karna pak... NaN                                                NaN                                                                   