In [13]:
import tweetnlp
import pandas as pd

In [None]:
df = print(pd.read_csv("C:\\Users\\User\\Sentiment\\tweetnlp\\question_8.csv"))

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x93 in position 1099: invalid start byte

In [None]:
# Robust CSV reading function
def read_csv_with_encoding_detection(file_path):
    """
    Attempts to read a CSV file by trying different encodings.
    
    Args:
        file_path (str): Path to the CSV file
        
    Returns:
        pd.DataFrame or None: The loaded DataFrame or None if failed
    """
    import os
    
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"❌ File not found: {file_path}")
        # List available CSV files in the directory
        directory = os.path.dirname(file_path)
        if os.path.exists(directory):
            csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
            if csv_files:
                print("Available CSV files in the directory:")
                for file in csv_files:
                    print(f"  - {file}")
        return None
    
    # Common encodings to try
    encodings = [
        'utf-8',           # Most common modern encoding
        'latin-1',         # Common for Western European languages
        'iso-8859-1',      # Similar to latin-1
        'cp1252',          # Windows encoding
        'utf-16',          # Unicode 16-bit
        'utf-8-sig',       # UTF-8 with BOM
        'cp850',           # DOS encoding
        'ascii'            # Basic ASCII
    ]
    
    for encoding in encodings:
        try:
            print(f"🔍 Trying encoding: {encoding}")
            df = pd.read_csv(file_path, encoding=encoding)
            print(f"✅ Success! File loaded with encoding: {encoding}")
            print(f"   Shape: {df.shape}")
            return df
            
        except UnicodeDecodeError as e:
            print(f"❌ UnicodeDecodeError with {encoding}")
            continue
        except Exception as e:
            print(f"❌ Other error with {encoding}: {str(e)[:60]}...")
            continue
    
    # If all encodings failed, try with error handling
    print("\n🔧 Trying with error handling (replacing problematic characters)...")
    try:
        df = pd.read_csv(file_path, encoding='utf-8', errors='replace')
        print("✅ Loaded with character replacement (some characters may appear as �)")
        return df
    except:
        try:
            df = pd.read_csv(file_path, encoding='latin-1', errors='ignore')
            print("✅ Loaded with character ignoring (some characters may be missing)")
            return df
        except Exception as e:
            print(f"❌ Final attempt failed: {e}")
            return None

# Now try to load your CSV file
print("Loading CSV file with encoding detection...")
print("="*50)
df = read_csv_with_encoding_detection(r"C:\Users\User\Sentiment\tweetnlp\question1.csv")

if df is not None:
    print("\n" + "="*50)
    print("FILE LOADED SUCCESSFULLY!")
    print("="*50)
    print(f"Shape: {df.shape}")
    print(f"Columns: {df.columns.tolist()}")
    print("\nFirst few rows:")
    print(df.head())
else:
    print("\n❌ Unable to load the file. Please check the file path and format.")

In [None]:
# Let's examine the structure of the CSV file
print("DataFrame shape:", df.shape)
print("\nColumn names:", df.columns.tolist())
print("\nFirst few rows:")
print(df.head())

DataFrame shape: (12, 1)

Column names: ['Q9: Do Brands openly share their policies and guidelines regarding the use of AI?']

First few rows:
  Q9: Do Brands openly share their policies and guidelines regarding the use of AI?
0  Some do, some do not, due to legal grey areas ...                               
1  In the current environment, I don't see brands...                               
2                     Some brands do but not fully.                                
3  No..not at the moment but this will change soo...                               
4  Yes but I have not in a way that is easy or di...                               


In [None]:
# Load the sentiment analysis model
print("Loading tweetnlp sentiment model...")
model = tweetnlp.load_model("sentiment")
print("Model loaded successfully!")

Loading tweetnlp sentiment model...


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model loaded successfully!


In [None]:
# Perform sentiment analysis on all responses
column_name = df.columns[0]  # Get the column name
responses = df[column_name].tolist()

print("Performing sentiment analysis on all responses...")
print("=" * 60)

sentiments = []
for i, response in enumerate(responses):
    if pd.notna(response):  # Check if response is not NaN
        sentiment = model.predict(str(response))
        sentiments.append(sentiment['label'])
        print(f"Response {i+1}: {sentiment['label']}")
        print(f"Text: {response[:100]}...")  # Show first 100 characters
        print("-" * 40)
    else:
        sentiments.append('N/A')
        print(f"Response {i+1}: N/A (empty response)")
        print("-" * 40)

# Add sentiment results to the dataframe
df['sentiment'] = sentiments
print(f"\nSentiment analysis completed for {len(responses)} responses!")

Performing sentiment analysis on all responses...
Response 1: neutral
Text: Some do, some do not, due to legal grey areas but i believe that they should...
----------------------------------------
Response 2: negative
Text: In the current environment, I don't see brands share their policies and guidelines regarding the use...
----------------------------------------
Response 3: neutral
Text: Some brands do but not fully. ...
----------------------------------------
Response 4: neutral
Text: No..not at the moment but this will change soon as mandatory disclosure requirements are impending ...
----------------------------------------
Response 5: neutral
Text: Yes but I have not in a way that is easy or digestible. I would assume it is somewhere in the terms ...
----------------------------------------
Response 6: neutral
Text: no...
----------------------------------------
Response 7: positive
Text: Yes and they should ...
----------------------------------------
Response 8: neutral
Text

In [None]:
# Display the updated dataframe with sentiment analysis
print("Updated DataFrame with Sentiment Analysis:")
print("=" * 50)
print(df)

print("\n" + "=" * 50)
print("SENTIMENT ANALYSIS SUMMARY")
print("=" * 50)

# Calculate sentiment distribution
sentiment_counts = df['sentiment'].value_counts()
print("Sentiment Distribution:")
for sentiment, count in sentiment_counts.items():
    percentage = (count / len(df)) * 100
    print(f"  {sentiment.capitalize()}: {count} responses ({percentage:.1f}%)")

print(f"\nTotal responses analyzed: {len(df)}")

# Show some insights
print("\n" + "=" * 50)
print("KEY INSIGHTS")
print("=" * 50)
most_common = sentiment_counts.index[0]
print(f"• Most common sentiment: {most_common.capitalize()} ({sentiment_counts.iloc[0]} responses)")
print(f"• Response diversity: {len(sentiment_counts)} different sentiment categories detected")

Updated DataFrame with Sentiment Analysis:
   Q9: Do Brands openly share their policies and guidelines regarding the use of AI?  \
0   Some do, some do not, due to legal grey areas ...                                  
1   In the current environment, I don't see brands...                                  
2                      Some brands do but not fully.                                   
3   No..not at the moment but this will change soo...                                  
4   Yes but I have not in a way that is easy or di...                                  
5                                                  no                                  
6                                Yes and they should                                   
7   Not sure, but it would not matter (see answer ...                                  
8   They have it somewhere if and when called upon...                                  
9   Most of the time, however, it is usually in a ...                        