In [39]:
import sys
import subprocess
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [40]:
def install_dependencies():
    """Auto-install required packages"""
    packages = ['pandas', 'numpy', 'scikit-learn', 'nltk']
    for package in packages:
        try:
            __import__(package)
        except ImportError:
            print(f"‚è≥ Installing {package}...")
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])

In [41]:
def setup_nltk():
    """Download NLTK resources"""
    import nltk
    try:
        nltk.data.find('corpora/stopwords')
    except LookupError:
        print("‚è≥ Downloading NLTK datasets...")
        nltk.download('stopwords')
        nltk.download('wordnet')
        nltk.download('omw-1.4')

In [42]:
def run_analysis():
    print("\n" + "="*50)
    print("üöÄ STARTING SENTIMENT ANALYSIS".center(50))
    print("="*50 + "\n")
    

In [43]:
   
  # Sample data (no file needed)
  data = {
      'review': [
          "This product is amazing! Worth every penny.",
          "Terrible quality. Complete waste of money.",
          "It's okay, but the packaging was damaged.",
          "Fast shipping and excellent customer service.",
          "Stopped working after 3 days. Very disappointed."
      ],
      'sentiment': [1, 0, 0, 1, 0]  # 1=Positive, 0=Negative
  }
  df = pd.DataFrame(data)
  
  print("üìä Sample Data Loaded:")
  print(df.head(2))
  
  # Preprocessing
  print("\nüîß Preprocessing text...")
  df['cleaned'] = df['review'].str.lower().str.replace('[^\w\s]', '')

üìä Sample Data Loaded:
                                        review  sentiment
0  This product is amazing! Worth every penny.          1
1   Terrible quality. Complete waste of money.          0

üîß Preprocessing text...


  df['cleaned'] = df['review'].str.lower().str.replace('[^\w\s]', '')


In [44]:
 
    # TF-IDF Vectorization
    print("\nüî¢ Creating numerical features...")
    tfidf = TfidfVectorizer(max_features=50)  # Simplified for demo
    X = tfidf.fit_transform(df['cleaned'])
    y = df['sentiment']


üî¢ Creating numerical features...


In [45]:
  
    # Train model
    print("\nü§ñ Training model...")
    model = LogisticRegression(max_iter=1000)
    model.fit(X, y)


ü§ñ Training model...


In [46]:

    # Evaluate
    predictions = model.predict(X)
    print("\nüìà Model Accuracy:", accuracy_score(y, predictions))
    


üìà Model Accuracy: 1.0


In [47]:
# Live predictions
print("\nüîÆ Test Predictions:")
test_phrases = [
    "I love this!",
    "Worst product ever",
    "It's okay"
]
for phrase in test_phrases:
    vec = tfidf.transform([phrase.lower()])
    pred = model.predict(vec)[0]
    print(f"'{phrase}' ‚Üí {'üëç POSITIVE' if pred == 1 else 'üëé NEGATIVE'}")


üîÆ Test Predictions:
'I love this!' ‚Üí üëé NEGATIVE
'Worst product ever' ‚Üí üëé NEGATIVE
'It's okay' ‚Üí üëé NEGATIVE
