<a href="https://colab.research.google.com/github/MohammadErfanRashidi/sentiment-analysis/blob/main/sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numpy pandas scikit-learn matplotlib seaborn
!pip install nltk
!pip install transformers


In [None]:
import pandas as pd

# Load dataset (example with CSV)
url = "YourDataSet.csv"  # Replace with the actual URL or path to your dataset
df = pd.read_csv(url, encoding='latin-1', header=None, names=['sentiment', 'id', 'date', 'query', 'user', 'text'])

# Preview the dataset
df.head()


In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [word for word in tokens if word.isalpha()]
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    return ' '.join(tokens)

# Apply preprocessing
df['processed_text'] = df['text'].apply(preprocess_text)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['processed_text'], df['sentiment'], test_size=0.3, random_state=42)


In [7]:
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)


In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

# Make predictions
y_pred = classifier.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


In [None]:
from transformers import pipeline

# Load pre-trained sentiment-analysis model
sentiment_pipeline = pipeline("sentiment-analysis")

def classify_sentences(input_file, positive_output, negative_output, neutral_output):
    with open(input_file, 'r') as infile, \
         open(positive_output, 'w') as pos_file, \
         open(negative_output, 'w') as neg_file, \
         open(neutral_output, 'w') as neu_file:

        for line in infile:
            result = sentiment_pipeline(line.strip())
            label = result[0]['label']
            if label == 'POSITIVE':
                pos_file.write(line)
            elif label == 'NEGATIVE':
                neg_file.write(line)
            else:
                neu_file.write(line)

# Example usage:
input_file = 'TestFile.txt'
positive_output = 'positive_sentences.txt'
negative_output = 'negative_sentences.txt'
neutral_output = 'neutral_sentences.txt'

classify_sentences(input_file, positive_output, negative_output, neutral_output)